aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetSelectionDAG.td45
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp36
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td111
3 files changed, 185 insertions, 7 deletions
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 3288dd4..f55cf0e 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -657,6 +657,51 @@ def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
+def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
+}]>;
+def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::f32;
+}]>;
+def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::f64;
+}]>;
+
+def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
+}]>;
+def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
+}]>;
+def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
// store fragments.
def unindexedstore : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index de10afa..40e1e22 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -533,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
- // a destination type that is wider than the source.
+ // a destination type that is wider than the source, and nor does
+ // it have a FP_TO_[SU]INT instruction with a narrower destination than
+ // source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -555,7 +559,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
+ // It is legal to extload from v4i8 to v4i16 or v4i32.
+ MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16,
+ MVT::v2i32};
+ for (unsigned i = 0; i < 6; ++i) {
+ setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ }
}
computeRegisterProperties();
@@ -3058,12 +3070,22 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
}
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
- assert(Op.getValueType().getVectorElementType() == MVT::i32
- && "Unexpected custom lowering");
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
- if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
- return Op;
- return DAG.UnrollVectorOp(Op.getNode());
+ if (Op.getValueType().getVectorElementType() == MVT::i32) {
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4i16)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 154fb25..76aefe6 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -5624,6 +5624,117 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+// Vector lengthening move with load, matching extending loads.
+
+// extload, zextload and sextload for a standard lengthening load. Example:
+// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
+// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+}
+
+// extload, zextload and sextload for a lengthening load which only uses
+// half the lanes available. Example:
+// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
+// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+// (VLDRS addrmode5:$addr),
+// ssub_0)),
+// dsub_0)>;
+multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length.
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
+// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+// (VLDRS addrmode5:$addr),
+// ssub_0)),
+// dsub_0)),
+// qsub_0)>;
+multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, SubRegIndex RegType> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+}
+
+defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
+
+defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
+defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+// Double lengthening - v4i8 -> v4i16 -> v4i32
+defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>;
+// v2i8 -> v2i16 -> v2i32
+defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>;
+// v2i16 -> v2i32 -> v2i64
+defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>;
+
+// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
+def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
//===----------------------------------------------------------------------===//
// Assembler aliases