aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td8
-rw-r--r--test/MC/ARM/neon-mul-accum-encoding.ll24
2 files changed, 28 insertions, 4 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8070482..691ef03 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1495,10 +1495,10 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
diff --git a/test/MC/ARM/neon-mul-accum-encoding.ll b/test/MC/ARM/neon-mul-accum-encoding.ll
index 891e76f..ff794e2 100644
--- a/test/MC/ARM/neon-mul-accum-encoding.ll
+++ b/test/MC/ARM/neon-mul-accum-encoding.ll
@@ -165,3 +165,27 @@ define <2 x i64> @vmlalu_2xi32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) noun
%tmp7 = add <2 x i64> %tmp1, %tmp6
ret <2 x i64> %tmp7
}
+
+declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+; CHECK: vqdmlal_4xi16
+define <4 x i32> @vqdmlal_4xi16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+; CHECK: vqdmlal.s16 q8, d19, d18 @ encoding: [0xa2,0x09,0xd3,0xf2
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+; CHECK: vqdmlal_2xi32
+define <2 x i64> @vqdmlal_2xi32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+; CHECK: vqdmlal.s32 q8, d19, d18 @ encoding: [0xa2,0x09,0xe3,0xf2]
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+