aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp10
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td8
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp24
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp22
4 files changed, 62 insertions, 2 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 90a5e5e..9e59972 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -557,11 +557,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+ // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
+ // -> v8f16 conversions.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+ // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
+ // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index f7db50a..92d4460 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5128,22 +5128,26 @@ def : Pat<(trap), (BRK 1)>;
// Natural vector casts (64 bit)
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -5158,22 +5162,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
// Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index f37737d..c78b79f 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -221,9 +221,23 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
}
- // MIPS doesn't have extending float->double load/store
- for (MVT VT : MVT::fp_valuetypes())
+ // MIPS doesn't have extending float->double load/store. Set LoadExtAction
+ // for f32, f16
+ for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ }
+
+ // Set LoadExtAction for f16 vectors to Expand
+ for (MVT VT : MVT::fp_vector_valuetypes()) {
+ MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements());
+ if (F16VT.isValid())
+ setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand);
+ }
+
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// Used by legalize types to correctly generate the setcc result.
@@ -339,6 +353,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
+ // Lower f16 conversion operations into library calls
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c32412a..a04eca1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -746,6 +746,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// them legal.
if (VT.getVectorElementType() == MVT::i1)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
+
+ // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
+ // split/scalarized right now.
+ if (VT.getVectorElementType() == MVT::f16)
+ setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
}
}
@@ -17304,6 +17309,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert
+ // (FP_TO_SINT (load f16)) to FP_TO_INT*.
+ if (N->getOperand(0).getValueType() == MVT::f16)
+ break;
+ // fallthrough
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
@@ -17349,6 +17359,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V);
return;
}
+ case ISD::FP_EXTEND: {
+ // Right now, only MVT::v2f32 has OperationAction for FP_EXTEND.
+ // No other ValueType for FP_EXTEND should reach this point.
+ assert(N->getValueType(0) == MVT::v2f32 &&
+ "Do not know how to legalize this Node");
+ return;
+ }
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
@@ -23642,6 +23659,11 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT VT = Ld->getValueType(0);
+
+ // This transformation is not supported if the result type is f16
+ if (N->getValueType(0) == MVT::f16)
+ return SDValue();
+
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget->is64Bit() && VT == MVT::i64) {