1 files changed, 116 insertions, 146 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index b8840aa..f600d24 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -215,8 +215,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
 // two operands constraints.
 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
-  RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode,
-  SDPatternOperator opnode>
+  RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, 
+  bits<5> opcode, SDPatternOperator opnode>
   : NeonI_3VSame<q, u, size, opcode,
     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
@@ -321,11 +321,13 @@ defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
 // ORR disassembled as MOV if Vn==Vm
 
 // Vector Move - register
-// Alias for ORR if Vn=Vm and it is the preferred syntax
+// Alias for ORR if Vn=Vm.
+// FIXME: This is actually the preferred syntax but TableGen can't deal with
+// custom printing of aliases.
 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
-                    (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>;
+                    (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
-                    (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>;
+                    (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
 
 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
   ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
@@ -571,7 +573,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
 // NeonI_compare_aliases class: swaps register operands to implement
 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
 class NeonI_compare_aliases<string asmop, string asmlane,
-                            Instruction inst, RegisterClass VPRC>
+                            Instruction inst, RegisterOperand VPRC>
   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
                     ", $Rm" # asmlane,
                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
@@ -1324,7 +1326,7 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
 }
 
 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
-                                Instruction inst, RegisterClass VPRC>
+                                Instruction inst, RegisterOperand VPRC>
   : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
 
@@ -1401,7 +1403,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
 
 // Vector Floating Point Move Immediate
 
-class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy,
+class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
                       Operand immOpType, bit q, bit op>
   : NeonI_1VModImm<q, op,
                    (outs VPRC:$Rd), (ins immOpType:$Imm),
@@ -1456,7 +1458,7 @@ def shr_imm32 : shr_imm<"32">;
 def shr_imm64 : shr_imm<"64">;
 
 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
-               RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
+               RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
@@ -1634,7 +1636,7 @@ defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
 
 // Rounding/Saturating shift
 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
-                  RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                  RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
                   SDPatternOperator OpNode>
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
@@ -1736,7 +1738,7 @@ defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
 
 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
-                  RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                  RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
                   SDNode OpNode>
   : NeonI_2VShiftImm<q, u, opcode,
            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
@@ -1792,7 +1794,7 @@ defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
 
 // Rounding shift accumulate
 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
-                    RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                    RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
                     SDPatternOperator OpNode>
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
@@ -1847,7 +1849,7 @@ defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
 
 // Shift insert by immediate
 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
-                  RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+                  RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
                   SDPatternOperator OpNode>
     : NeonI_2VShiftImm<q, u, opcode,
            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
@@ -1953,7 +1955,7 @@ class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                        string SrcT, Operand ImmTy>
   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
-                     (ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm),
+                     (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
                      [], NoItinerary> {
   let Constraints = "$src = $Rd";
@@ -2040,15 +2042,18 @@ multiclass Neon_shiftNarrow_patterns<string shr> {
   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
                 VPR128:$Rn, imm:$Imm)))))),
-            (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+            (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+                         VPR128:$Rn, imm:$Imm)>;
   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
                 VPR128:$Rn, imm:$Imm)))))),
-            (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+            (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+                        VPR128:$Rn, imm:$Imm)>;
   def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
                 VPR128:$Rn, imm:$Imm)))))),
-            (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+            (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+                        VPR128:$Rn, imm:$Imm)>;
 }
 
 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
@@ -2060,17 +2065,20 @@ multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
 
   def : Pat<(Neon_combine (v1i64 VPR64:$src),
-              (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
+                (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
             (!cast<Instruction>(prefix # "_16B")
-              VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+                (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+                VPR128:$Rn, imm:$Imm)>;
   def : Pat<(Neon_combine (v1i64 VPR64:$src),
-              (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
+                (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
             (!cast<Instruction>(prefix # "_8H")
-              VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+                (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+                VPR128:$Rn, imm:$Imm)>;
   def : Pat<(Neon_combine (v1i64 VPR64:$src),
-              (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
+                (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
             (!cast<Instruction>(prefix # "_4S")
-              VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+                  (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+                  VPR128:$Rn, imm:$Imm)>;
 }
 
 defm : Neon_shiftNarrow_patterns<"lshr">;
@@ -2086,7 +2094,7 @@ defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
 
 // Convert fix-point and float-pointing
 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
-                RegisterClass VPRC, ValueType DestTy, ValueType SrcTy,
+                RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
                 Operand ImmTy, SDPatternOperator IntOp>
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
@@ -2162,7 +2170,7 @@ defm NI_zext_high : Neon_sshll2_0<zext>;
 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
                  string asmop, string ResS, string OpS,
                  SDPatternOperator opnode, SDPatternOperator ext,
-                 RegisterClass OpVPR,
+                 RegisterOperand OpVPR,
                  ValueType ResTy, ValueType OpTy>
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
@@ -2244,7 +2252,7 @@ defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
                  string asmop, string ResS, string OpS,
                  SDPatternOperator opnode, SDPatternOperator ext,
-                 RegisterClass OpVPR,
+                 RegisterOperand OpVPR,
                  ValueType ResTy, ValueType OpTy>
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
@@ -2325,7 +2333,7 @@ multiclass NeonI_get_high
 }
 
 defm NI_get_hi : NeonI_get_high;
-                                 
+
 // pattern for addhn/subhn with 2 operands
 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
                            string asmop, string ResS, string OpS,
@@ -2361,7 +2369,7 @@ defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
                     string asmop, string ResS, string OpS,
                     SDPatternOperator opnode,
-                    RegisterClass ResVPR, RegisterClass OpVPR,
+                    RegisterOperand ResVPR, RegisterOperand OpVPR,
                     ValueType ResTy, ValueType OpTy>
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
@@ -2389,78 +2397,70 @@ defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
 
 // pattern for acle intrinsic with 3 operands
-class NeonI_3VDN_addhn2_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
-                            string asmop, string ResS, string OpS,
-                            SDPatternOperator opnode, SDPatternOperator get_hi,
-                            ValueType OpTy, ValueType OpSTy>
-  : NeonI_3VDiff<q, u, size, opcode,
-                 (outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm),
-                 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
-                 [(set (v2i64 VPR128:$Rd),
-                    (Neon_combine
-                      (v1i64 VPR64:$src),
-                      (v1i64 (bitconvert
-                        (OpSTy (get_hi
-                          (OpTy (opnode (OpTy VPR128:$Rn),
-                                        (OpTy VPR128:$Rm)))))))))],
-                 NoItinerary> {
-  let Constraints = "$src = $Rd";
-}
-
-multiclass NeonI_3VDN_addhn2_3Op_v1<bit u, bits<4> opcode,
-                                    string asmop, 
-                                    SDPatternOperator opnode>
-{
-  def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h",
-                                     opnode, NI_get_hi_8h, v8i16, v8i8>;
-  def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s",
-                                    opnode, NI_get_hi_4s, v4i32, v4i16>;
-  def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d",
-                                    opnode, NI_get_hi_2d, v2i64, v2i32>;
-}
-
-defm ADDHN2vvv  : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>;
-defm SUBHN2vvv  : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>;
-
-// pattern for acle intrinsic with 3 operands
 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
-                     string asmop, string ResS, string OpS,
-                     SDPatternOperator opnode,
-                     ValueType OpTy, ValueType OpSTy>
+                     string asmop, string ResS, string OpS>
   : NeonI_3VDiff<q, u, size, opcode,
-                 (outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm),
+                 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
-                 [(set (v2i64 VPR128:$Rd),
-                    (Neon_combine (v1i64 VPR64:$src),
-                                  (v1i64 (bitconvert 
-                                     (OpSTy (opnode (OpTy VPR128:$Rn),
-                                                    (OpTy VPR128:$Rm)))))))],
-                 NoItinerary> {
+                 [], NoItinerary> {
   let Constraints = "$src = $Rd";
+  let neverHasSideEffects = 1;
 }
 
 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
-                             string asmop, 
-                             SDPatternOperator opnode>
-{
-  def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h",
-                              opnode, v8i16, v8i8>;
-  def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s",
-                             opnode, v4i32, v4i16>;
-  def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d",
-                             opnode, v2i64, v2i32>;
-}
-
-defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2",
-                                    int_arm_neon_vraddhn>;
-defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2",
-                                    int_arm_neon_vrsubhn>;
+                             string asmop> {
+  def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
+  def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
+  def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
+}
+
+defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
+defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
+
+defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
+defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
+
+// Patterns have to be separate because there's a SUBREG_TO_REG in the output
+// part.
+class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
+                        SDPatternOperator coreop>
+  : Pat<(Neon_combine (v1i64 VPR64:$src),
+                      (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
+                                                        (SrcTy VPR128:$Rm)))))),
+        (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+              VPR128:$Rn, VPR128:$Rm)>;
+
+// addhn2 patterns
+def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
+          BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
+          BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
+          BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
+
+// subhn2 patterns
+def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
+          BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
+          BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
+          BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
+
+// raddhn2 patterns
+def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
+def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
+def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
+
+// rsubhn2 patterns
+def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
+def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
+def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
 
 // pattern that need to extend result
 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
                      string asmop, string ResS, string OpS,
                      SDPatternOperator opnode,
-                     RegisterClass OpVPR,
+                     RegisterOperand OpVPR,
                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
@@ -2528,7 +2528,7 @@ defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
                      string asmop, string ResS, string OpS, 
                      SDPatternOperator opnode, SDPatternOperator subop,
-                     RegisterClass OpVPR,
+                     RegisterOperand OpVPR,
                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
@@ -2684,7 +2684,7 @@ defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
                            string asmop, string ResS, string OpS,
                            SDPatternOperator subop, SDPatternOperator opnode,
-                           RegisterClass OpVPR,
+                           RegisterOperand OpVPR,
                            ValueType ResTy, ValueType OpTy>
   : NeonI_3VDiff<q, u, size, opcode,
                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
@@ -2856,11 +2856,7 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
 
 class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
   : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
-        (SUBREG_TO_REG (i64 0),
-              (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
-             (EXTRACT_SUBREG VPR64:$Rm, sub_64)),
-          sub_64)>;
-
+        (INSTD VPR64:$Rn, VPR64:$Rm)>;
 
 // Scalar Integer Add
 let isCommutable = 1 in {
@@ -2994,54 +2990,28 @@ def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
 
-def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))),
-                 (f64 (EXTRACT_SUBREG (v8i8  VPR64:$src), sub_64))>;
-def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))),
-                 (f64 (EXTRACT_SUBREG (v4i16  VPR64:$src), sub_64))>;
-def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))),
-                 (f64 (EXTRACT_SUBREG (v2i32  VPR64:$src), sub_64))>;
-def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))),
-                 (f64 (EXTRACT_SUBREG (v2f32  VPR64:$src), sub_64))>;
-def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))),
-                 (f64 (EXTRACT_SUBREG (v1i64  VPR64:$src), sub_64))>;
-def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))),
-                 (f128 (EXTRACT_SUBREG (v16i8  VPR128:$src), sub_alias))>;
-def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))),
-                 (f128 (EXTRACT_SUBREG (v8i16  VPR128:$src), sub_alias))>;
-def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))),
-                 (f128 (EXTRACT_SUBREG (v4i32  VPR128:$src), sub_alias))>;
-def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))),
-                 (f128 (EXTRACT_SUBREG (v2i64  VPR128:$src), sub_alias))>;
-def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))),
-                 (f128 (EXTRACT_SUBREG (v4f32  VPR128:$src), sub_alias))>;
-def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))),
-                 (f128 (EXTRACT_SUBREG (v2f64  VPR128:$src), sub_alias))>;
-
-def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))),
-                  (v8i8 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>;
-def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))),
-                  (v4i16 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>;
-def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))),
-                  (v2i32 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>;
-def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))),
-                  (v2f32 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>;
-def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))),
-                  (v1i64 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>;
-def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))),
-                  (v16i8 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src),
-                  sub_alias))>;
-def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))),
-                  (v8i16 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src),
-                  sub_alias))>;
-def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))),
-                  (v4i32 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src),
-                  sub_alias))>;
-def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))),
-                  (v2i64 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src),
-                  sub_alias))>;
-def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))),
-                  (v4f32 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src),
-                  sub_alias))>;
-def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))),
-                  (v2f64 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src),
-                  sub_alias))>;
+def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
+
+def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
+
+def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;