aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp231
1 files changed, 150 insertions, 81 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4f8b90f..5096d9a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::SREM_I64, "_allrem");
+ setLibcallName(RTLIB::UREM_I64, "_aullrem");
+ setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
}
@@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
}
+ // We don't support FMA.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
// Long double always uses X87.
if (!UseSoftFloat) {
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
@@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
}
+
+ setOperationAction(ISD::FMA, MVT::f80, Expand);
}
// Always use a library call for pow.
@@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
- setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
@@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
- // Custom lower build_vector, vector_shuffle, scalar_to_vector,
- // insert_vector_elt extract_subvector and extract_vector_elt for
- // 256-bit types.
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-256-bit vectors
- if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
- || (MVT(VT).getSizeInBits() < 256))
- continue;
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- }
- // Custom-lower insert_subvector and extract_subvector based on
- // the result type.
+ // Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-256-bit vectors
- if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Extract subvector is special because the value type
+ // (result) is 128-bit but the source is 256-bit wide.
+ if (VT.is128BitVector())
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+ // Do not attempt to custom lower other non-256-bit vectors
+ if (!VT.is256BitVector())
continue;
- if (MVT(VT).getSizeInBits() == 128) {
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- }
- else if (MVT(VT).getSizeInBits() == 256) {
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- }
+ setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- // Don't promote loads because we need them for VPERM vector index versions.
+ for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- VT++) {
- if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
- || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
+ // Do not attempt to promote non-256-bit vectors
+ if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64);
- setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64);
- setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64);
- //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
- //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64);
- setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
+
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
}
}
+ // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+ // of this type with custom code.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -3832,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
}
/// getOnesVector - Returns a vector of specified type with all bits set.
-///
+/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
+/// their original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
+ assert((VT.is128BitVector() || VT.is256BitVector())
+ && "Expected a 128-bit or 256-bit vector type");
- // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
- // type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+
SDValue Vec;
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ if (VT.is256BitVector()) {
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
-
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
@@ -4459,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return ConcatVectors(Lower, Upper, DAG);
}
- // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
- // All one's are handled with pcmpeqd. In AVX, zero's are handled with
- // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
- // is present, so AllOnes is ignored.
+ // All zero's:
+ // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
+ // All one's:
+ // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
- (Op.getValueType().getSizeInBits() != 256 &&
- ISD::isBuildVectorAllOnes(Op.getNode()))) {
- // Canonicalize this to <4 x i32> (SSE) to
+ ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32)
+ if (Op.getValueType() == MVT::v4i32 ||
+ Op.getValueType() == MVT::v8i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -8916,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Lower SHL with variable shift amount.
- // Cannot lower SHL without SSE4.1 or later.
- if (!Subtarget->hasSSE41()) return SDValue();
+ // Cannot lower SHL without SSE2 or later.
+ if (!Subtarget->hasSSE2()) return SDValue();
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
@@ -9064,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return Sum;
}
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode* Node = Op.getNode();
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+
+ if (Subtarget->hasSSE2() && VT.isVector()) {
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+ unsigned SHLIntrinsicsID = 0;
+ unsigned SRAIntrinsicsID = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i64: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
+ SRAIntrinsicsID = 0;
+ break;
+ }
+ case MVT::v4i32: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+ break;
+ }
+ case MVT::v8i16: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+ break;
+ }
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+ Node->getOperand(0), ShAmt);
+
+ // In case of 1 bit sext, no need to shr
+ if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+ if (SRAIntrinsicsID) {
+ Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+ Tmp1, ShAmt);
+ }
+ return Tmp1;
+ }
+
+ return SDValue();
+}
+
+
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
- if (!Subtarget->hasSSE2()) {
+ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+ // There isn't any reason to disable it if the target processor supports it.
+ if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0,
- Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
DAG.getTargetConstant(1, MVT::i8), // Scale
@@ -9225,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
@@ -9323,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default:
assert(false && "Do not know how to custom type legalize this operation!");
return;
+ case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
@@ -9457,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
- case X86ISD::PANDN: return "X86ISD::PANDN";
+ case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
case X86ISD::PSIGND: return "X86ISD::PSIGND";
@@ -11808,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- // Want to form PANDN nodes, in the hopes of then easily combining them with
- // OR and AND nodes to form PBLEND/PSIGN.
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
EVT VT = N->getValueType(0);
- if (VT != MVT::v2i64)
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -11821,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
// Check LHS for vnot
if (N0.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
- return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
// Check RHS for vnot
if (N1.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
- return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
@@ -11852,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (Subtarget->hasSSSE3()) {
if (VT == MVT::v2i64) {
// Canonicalize pandn to RHS
- if (N0.getOpcode() == X86ISD::PANDN)
+ if (N0.getOpcode() == X86ISD::ANDNP)
std::swap(N0, N1);
// or (and (m, x), (pandn m, y))
- if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
SDValue Mask = N1.getOperand(0);
SDValue X = N1.getOperand(1);
SDValue Y;
@@ -11864,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (N0.getOperand(1) == Mask)
Y = N0.getOperand(0);
- // Check to see if the mask appeared in both the AND and PANDN and
+ // Check to see if the mask appeared in both the AND and ANDNP and
if (!Y.getNode())
return SDValue();
@@ -12592,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
case 'y':
case 'x':
case 'Y':
+ case 'l':
return C_RegisterClass;
case 'a':
case 'b':
@@ -12889,30 +12958,30 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// in the normal allocation?
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget->is64Bit()) {
- if (VT == MVT::i32)
+ if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, X86::GR32RegisterClass);
else if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
- else if (VT == MVT::i8)
+ else if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8RegisterClass);
- else if (VT == MVT::i64)
+ else if (VT == MVT::i64 || VT == MVT::f64)
return std::make_pair(0U, X86::GR64RegisterClass);
break;
}
// 32-bit fallthrough
case 'Q': // Q_REGS
- if (VT == MVT::i32)
+ if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
else if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
- else if (VT == MVT::i8)
+ else if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
else if (VT == MVT::i64)
return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
break;
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
- if (VT == MVT::i8)
+ if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8RegisterClass);
if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
@@ -12920,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, X86::GR32RegisterClass);
return std::make_pair(0U, X86::GR64RegisterClass);
case 'R': // LEGACY_REGS
- if (VT == MVT::i8)
+ if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16_NOREXRegisterClass);