Clang and AArch64 backend patches to support shll/shl and vmovl instructions and ACLE functions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188451 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hao Liu <Hao.Liu@arm.com> 2013-08-15 08:26:11 +0000
committer: Hao Liu <Hao.Liu@arm.com> 2013-08-15 08:26:11 +0000
commit: d9767021f83879429e930b068d1d6aef22285b33 (patch)
tree: 93c99311855843ce9f66f9990626667bbc9be5ab /lib
parent: 46ceaf4ba64cdd0ac37578c0132cad39c9ea21c0 (diff)
download: external_llvm-d9767021f83879429e930b068d1d6aef22285b33.zip
external_llvm-d9767021f83879429e930b068d1d6aef22285b33.tar.gz
external_llvm-d9767021f83879429e930b068d1d6aef22285b33.tar.bz2
4 files changed, 220 insertions, 1 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec838fb..d12302e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -77,6 +77,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
 
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::SRA);
+  setTargetDAGCombine(ISD::SHL);
 
   // AArch64 does not have i1 loads, or much of anything for i1 really.
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -3235,6 +3236,56 @@ static SDValue PerformSRACombine(SDNode *N,
                      DAG.getConstant(LSB + Width - 1, MVT::i64));
 }
 
+/// Check if this is a valid build_vector for the immediate operand of
+/// a vector shift operation, where all the elements of the build_vector
+/// must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+                                      HasAnyUndefs, ElementBits) ||
+      SplatBitSize > ElementBits)
+    return false;
+  Cnt = SplatBits.getSExtValue();
+  return true;
+}
+
+/// Check if this is a valid build_vector for the immediate operand of
+/// a vector shift left operation.  That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift
+static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  if (!getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  return (Cnt >= 0 && Cnt < ElementBits);
+}
+
+static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                                   const AArch64Subtarget *ST) {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+
+  // Nothing to be done for scalar shifts.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!VT.isVector() || !TLI.isTypeLegal(VT))
+    return SDValue();
+
+  assert(ST->hasNEON() && "unexpected vector shift");
+  int64_t Cnt;
+  if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
+    SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)),
+                              VT, DAG.getConstant(Cnt, MVT::i32));
+    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+  }
+
+  return SDValue();
+}
 
 SDValue
 AArch64TargetLowering::PerformDAGCombine(SDNode *N,
@@ -3244,6 +3295,7 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::AND: return PerformANDCombine(N, DCI);
   case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
   case ISD::SRA: return PerformSRACombine(N, DCI);
+  case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget());
   }
   return SDValue();
 }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 67a908e..c9795b2 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -132,7 +132,10 @@ namespace AArch64ISD {
     NEON_CMPZ,
 
     // Vector compare bitwise test
-    NEON_TST
+    NEON_TST,
+
+    // Operation for the immediate in vector shift
+    NEON_DUPIMM
   };
 }
 
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 09451fd..020ee6c 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1050,5 +1050,24 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
   // Inherit Rd in 4-0
 }
 
+// Format AdvSIMD 2 vector 1 immediate shift
+class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
+                       dag outs, dag ins, string asmstr,
+                       list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin>
+{
+  bits<7> Imm;
+  let Inst{31} = 0b0;
+  let Inst{30} = q;
+  let Inst{29} = u;
+  let Inst{28-23} = 0b011110;
+  let Inst{22-16} = Imm;
+  let Inst{15-11} = opcode;
+  let Inst{10} = 0b1;
+  
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
 }
 
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 98b9e3e..175c3aa 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -41,6 +41,9 @@ def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
 
+def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, 
+                    [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
+
 //===----------------------------------------------------------------------===//
 // Multiclasses
 //===----------------------------------------------------------------------===//
@@ -1409,6 +1412,148 @@ def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
 }
 
+// Vector Shift (Immediate) 
+
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 63; }]> {
+  let ParserMatchClass = uimm6_asmoperand;
+}
+
+class N2VShiftLeft<bit q, bit u, bits<5> opcode, string asmop, string T,
+                   RegisterClass VPRC, ValueType Ty, Operand ImmTy>
+  : NeonI_2VShiftImm<q, u, opcode,
+                     (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+                     [(set (Ty VPRC:$Rd),
+                        (Ty (shl (Ty VPRC:$Rn), 
+                          (Ty (Neon_dupImm (i32 imm:$Imm))))))],
+                     NoItinerary>;
+
+multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
+  // 64-bit vector types.
+  def _8B  : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> {
+    let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
+  }
+
+  def _4H  : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> {
+    let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
+  }
+
+  def _2S  : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> {
+    let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
+  }
+
+  // 128-bit vector types.
+  def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> {
+    let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
+  }
+
+  def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> {
+    let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
+  }
+
+  def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> {
+    let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
+  }
+
+  def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> {
+    let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
+  }
+}
+
+def Neon_top16B : PatFrag<(ops node:$in), 
+                          (extract_subvector (v16i8 node:$in), (iPTR 8))>;
+def Neon_top8H : PatFrag<(ops node:$in), 
+                         (extract_subvector (v8i16 node:$in), (iPTR 4))>;
+def Neon_top4S : PatFrag<(ops node:$in), 
+                         (extract_subvector (v4i32 node:$in), (iPTR 2))>;
+
+class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+                   string SrcT, ValueType DestTy, ValueType SrcTy,
+                   Operand ImmTy, SDPatternOperator ExtOp>
+  : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+                     (ins VPR64:$Rn, ImmTy:$Imm),
+                     asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+                     [(set (DestTy VPR128:$Rd),
+                        (DestTy (shl 
+                          (DestTy (ExtOp (SrcTy VPR64:$Rn))),
+                            (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
+                     NoItinerary>;
+
+class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+                       string SrcT, ValueType DestTy, ValueType SrcTy, 
+                       int StartIndex, Operand ImmTy,
+                       SDPatternOperator ExtOp, PatFrag getTop>
+  : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+                     (ins VPR128:$Rn, ImmTy:$Imm),
+                     asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+                     [(set (DestTy VPR128:$Rd), 
+                        (DestTy (shl
+                          (DestTy (ExtOp 
+                            (SrcTy (getTop VPR128:$Rn)))),
+                              (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
+                     NoItinerary>;
+
+multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
+                         SDNode ExtOp> {
+  // 64-bit vector types.
+  def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
+                         uimm3, ExtOp>{
+    let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
+  }
+
+  def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
+                         uimm4, ExtOp>{
+    let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
+  }
+
+  def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
+                         uimm5, ExtOp>{
+    let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
+  }
+
+  // 128-bit vector types
+  def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
+                              v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{
+    let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
+  }
+
+  def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
+                             v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{
+    let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
+  }
+
+  def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
+                             v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{
+    let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
+  }
+
+  // Use other patterns to match when the immediate is 0.
+  def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
+            (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
+
+  def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
+            (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
+
+  def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
+            (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
+
+  def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))),
+            (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
+
+  def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))),
+            (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
+
+  def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))),
+            (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
+}
+
+// Shift left immediate
+defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
+
+// Shift left long immediate
+defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
+defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
+
 // Scalar Arithmetic
 
 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
author	Hao Liu <Hao.Liu@arm.com>	2013-08-15 08:26:11 +0000
committer	Hao Liu <Hao.Liu@arm.com>	2013-08-15 08:26:11 +0000
commit	d9767021f83879429e930b068d1d6aef22285b33 (patch)
tree	93c99311855843ce9f66f9990626667bbc9be5ab /lib
parent	46ceaf4ba64cdd0ac37578c0132cad39c9ea21c0 (diff)
download	external_llvm-d9767021f83879429e930b068d1d6aef22285b33.zip external_llvm-d9767021f83879429e930b068d1d6aef22285b33.tar.gz external_llvm-d9767021f83879429e930b068d1d6aef22285b33.tar.bz2