diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsARM.td | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 14 | ||||
-rw-r--r-- | test/CodeGen/ARM/vadd.ll | 31 | ||||
-rw-r--r-- | test/CodeGen/ARM/vqdmul.ll | 82 | ||||
-rw-r--r-- | test/CodeGen/ARM/vsub.ll | 31 |
5 files changed, 6 insertions, 158 deletions
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index 3c5d5ff..e29a01b 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -163,7 +163,6 @@ let Properties = [IntrNoMem, Commutative] in { def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic; def int_arm_neon_vqadds : Neon_2Arg_Intrinsic; def int_arm_neon_vqaddu : Neon_2Arg_Intrinsic; - def int_arm_neon_vaddhn : Neon_2Arg_Narrow_Intrinsic; def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic; // Vector Multiply. @@ -175,10 +174,6 @@ let Properties = [IntrNoMem, Commutative] in { def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic; def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic; - // Vector Multiply and Accumulate/Subtract. - def int_arm_neon_vqdmlal : Neon_3Arg_Long_Intrinsic; - def int_arm_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic; - // Vector Maximum. def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic; def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic; @@ -201,7 +196,6 @@ def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic; def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic; def int_arm_neon_vqsubs : Neon_2Arg_Intrinsic; def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic; -def int_arm_neon_vsubhn : Neon_2Arg_Narrow_Intrinsic; def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic; // Vector Absolute Compare. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index cc455ad..49ae334 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3973,8 +3973,7 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", - int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; @@ -4140,8 +4139,8 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), @@ -4216,8 +4215,8 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), @@ -4301,8 +4300,7 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", - int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index c2c16aa..fcb5408 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -90,37 +90,6 @@ define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp3 } -define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vaddhni16: -;CHECK: vaddhn.i16 - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) - ret <8 x i8> %tmp3 -} - -define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vaddhni32: -;CHECK: vaddhn.i32 - %tmp1 = load <4 x i32>* %A - %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) - ret <4 x i16> %tmp3 -} - -define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: vaddhni64: -;CHECK: vaddhn.i64 - %tmp1 = load <2 x i64>* %A - %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) - ret <2 x i32> %tmp3 -} - -declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone - define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: vraddhni16: ;CHECK: vraddhn.i16 diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll index 01bf1a4..d298167 100644 --- a/test/CodeGen/ARM/vqdmul.ll +++ b/test/CodeGen/ARM/vqdmul.ll @@ -197,47 +197,6 @@ entry: declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone -define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { -;CHECK-LABEL: vqdmlals16: -;CHECK: vqdmlal.s16 - %tmp1 = load <4 x i32>* %A - %tmp2 = load <4 x i16>* %B - %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 -} - -define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { -;CHECK-LABEL: vqdmlals32: -;CHECK: vqdmlal.s32 - %tmp1 = load <2 x i64>* %A - %tmp2 = load <2 x i32>* %B - %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { -entry: -; CHECK: test_vqdmlal_lanes16 -; CHECK: vqdmlal.s16 q0, d2, d3[1] - %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 -} - -define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { -entry: -; CHECK: test_vqdmlal_lanes32 -; CHECK: vqdmlal.s32 q0, d2, d3[1] - %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 -} - -declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ;CHECK-LABEL: vqdmlals16_natural: ;CHECK: vqdmlal.s16 @@ -283,47 +242,6 @@ entry: declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { -;CHECK-LABEL: vqdmlsls16: -;CHECK: vqdmlsl.s16 - %tmp1 = load <4 x i32>* %A - %tmp2 = load <4 x i16>* %B - %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 -} - -define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { -;CHECK-LABEL: vqdmlsls32: -;CHECK: vqdmlsl.s32 - %tmp1 = load <2 x i64>* %A - %tmp2 = load <2 x i32>* %B - %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { -entry: -; CHECK: test_vqdmlsl_lanes16 -; CHECK: vqdmlsl.s16 q0, d2, d3[1] - %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 -} - -define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { -entry: -; CHECK: test_vqdmlsl_lanes32 -; CHECK: vqdmlsl.s32 q0, d2, d3[1] - %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 -} - -declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ;CHECK-LABEL: vqdmlsls16_natural: ;CHECK: vqdmlsl.s16 diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 8ed8d42..6b95b97 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -90,37 +90,6 @@ define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp3 } -define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vsubhni16: -;CHECK: vsubhn.i16 - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) - ret <8 x i8> %tmp3 -} - -define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vsubhni32: -;CHECK: vsubhn.i32 - %tmp1 = load <4 x i32>* %A - %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) - ret <4 x i16> %tmp3 -} - -define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: vsubhni64: -;CHECK: vsubhn.i64 - %tmp1 = load <2 x i64>* %A - %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) - ret <2 x i32> %tmp3 -} - -declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone - define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vsubhni16_natural: ; CHECK: vsubhn.i16 |