diff options
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 231 |
1 files changed, 150 insertions, 81 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4f8b90f..5096d9a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); + setLibcallName(RTLIB::SREM_I64, "_allrem"); + setLibcallName(RTLIB::UREM_I64, "_aullrem"); + setLibcallName(RTLIB::MUL_I64, "_allmul"); setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); } @@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } + // We don't support FMA. + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Long double always uses X87. if (!UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); @@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + + setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. @@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); - setOperationAction(ISD::LOAD, MVT::v8i32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); @@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); - // Custom lower build_vector, vector_shuffle, scalar_to_vector, - // insert_vector_elt extract_subvector and extract_vector_elt for - // 256-bit types. - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements()) - || (MVT(VT).getSizeInBits() < 256)) - continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - } - // Custom-lower insert_subvector and extract_subvector based on - // the result type. + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements())) + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; + + // Extract subvector is special because the value type + // (result) is 128-bit but the source is 256-bit wide. + if (VT.is128BitVector()) + setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + + // Do not attempt to custom lower other non-256-bit vectors + if (!VT.is256BitVector()) continue; - if (MVT(VT).getSizeInBits() == 128) { - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - } - else if (MVT(VT).getSizeInBits() == 256) { - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - } + setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - // Don't promote loads because we need them for VPERM vector index versions. + for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; - VT++) { - if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements()) - || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256)) + // Do not attempt to promote non-256-bit vectors + if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64); - //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64); + + setOperationAction(ISD::AND, SVT, Promote); + AddPromotedToType (ISD::AND, SVT, MVT::v4i64); + setOperationAction(ISD::OR, SVT, Promote); + AddPromotedToType (ISD::OR, SVT, MVT::v4i64); + setOperationAction(ISD::XOR, SVT, Promote); + AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); + setOperationAction(ISD::LOAD, SVT, Promote); + AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); + setOperationAction(ISD::SELECT, SVT, Promote); + AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); } } + // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion + // of this type with custom code. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) { + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom); + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -3832,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// +/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to +/// their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); + assert((VT.is128BitVector() || VT.is256BitVector()) + && "Expected a 128-bit or 256-bit vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + if (VT.is256BitVector()) { + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } - /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { @@ -4459,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return ConcatVectors(Lower, Upper, DAG); } - // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. - // All one's are handled with pcmpeqd. In AVX, zero's are handled with - // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd - // is present, so AllOnes is ignored. + // All zero's: + // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) + // All one's: + // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) if (ISD::isBuildVectorAllZeros(Op.getNode()) || - (Op.getValueType().getSizeInBits() != 256 && - ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to <4 x i32> (SSE) to + ISD::isBuildVectorAllOnes(Op.getNode())) { + // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32) + if (Op.getValueType() == MVT::v4i32 || + Op.getValueType() == MVT::v8i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -8916,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Lower SHL with variable shift amount. - // Cannot lower SHL without SSE4.1 or later. - if (!Subtarget->hasSSE41()) return SDValue(); + // Cannot lower SHL without SSE2 or later. + if (!Subtarget->hasSSE2()) return SDValue(); if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, @@ -9064,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + SDNode* Node = Op.getNode(); + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + + if (Subtarget->hasSSE2() && VT.isVector()) { + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); + + unsigned SHLIntrinsicsID = 0; + unsigned SRAIntrinsicsID = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i64: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q; + SRAIntrinsicsID = 0; + break; + } + case MVT::v4i32: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; + break; + } + case MVT::v8i16: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; + break; + } + } + + SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SHLIntrinsicsID, MVT::i32), + Node->getOperand(0), ShAmt); + + // In case of 1 bit sext, no need to shr + if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; + + if (SRAIntrinsicsID) { + Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SRAIntrinsicsID, MVT::i32), + Tmp1, ShAmt); + } + return Tmp1; + } + + return SDValue(); +} + + SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasSSE2()) { + // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. + // There isn't any reason to disable it if the target processor supports it. + if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, - Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, MVT::i8), // Scale @@ -9225,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -9323,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -9457,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; - case X86ISD::PANDN: return "X86ISD::PANDN"; + case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; @@ -11808,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Want to form PANDN nodes, in the hopes of then easily combining them with - // OR and AND nodes to form PBLEND/PSIGN. + // Want to form ANDNP nodes: + // 1) In the hopes of then easily combining them with OR and AND nodes + // to form PBLEND/PSIGN. + // 2) To match ANDN packed intrinsics EVT VT = N->getValueType(0); - if (VT != MVT::v2i64) + if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); SDValue N0 = N->getOperand(0); @@ -11821,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); } @@ -11852,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (Subtarget->hasSSSE3()) { if (VT == MVT::v2i64) { // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::PANDN) + if (N0.getOpcode() == X86ISD::ANDNP) std::swap(N0, N1); // or (and (m, x), (pandn m, y)) - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) { + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { SDValue Mask = N1.getOperand(0); SDValue X = N1.getOperand(1); SDValue Y; @@ -11864,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (N0.getOperand(1) == Mask) Y = N0.getOperand(0); - // Check to see if the mask appeared in both the AND and PANDN and + // Check to see if the mask appeared in both the AND and ANDNP and if (!Y.getNode()) return SDValue(); @@ -12592,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { case 'y': case 'x': case 'Y': + case 'l': return C_RegisterClass; case 'a': case 'b': @@ -12889,30 +12958,30 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // in the normal allocation? case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget->is64Bit()) { - if (VT == MVT::i32) + if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, X86::GR32RegisterClass); else if (VT == MVT::i16) return std::make_pair(0U, X86::GR16RegisterClass); - else if (VT == MVT::i8) + else if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8RegisterClass); - else if (VT == MVT::i64) + else if (VT == MVT::i64 || VT == MVT::f64) return std::make_pair(0U, X86::GR64RegisterClass); break; } // 32-bit fallthrough case 'Q': // Q_REGS - if (VT == MVT::i32) + if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, X86::GR32_ABCDRegisterClass); else if (VT == MVT::i16) return std::make_pair(0U, X86::GR16_ABCDRegisterClass); - else if (VT == MVT::i8) + else if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass); else if (VT == MVT::i64) return std::make_pair(0U, X86::GR64_ABCDRegisterClass); break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8RegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16RegisterClass); @@ -12920,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); case 'R': // LEGACY_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8_NOREXRegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16_NOREXRegisterClass); |