diff options
-rw-r--r-- | include/llvm/IntrinsicsARM.td | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 30 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 7 | ||||
-rw-r--r-- | test/Bitcode/neon-intrinsics.ll | 14 | ||||
-rw-r--r-- | test/Bitcode/neon-intrinsics.ll.bc | bin | 3796 -> 4116 bytes | |||
-rw-r--r-- | test/CodeGen/ARM/vmov.ll | 10 |
6 files changed, 51 insertions, 11 deletions
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td index 2528eaa..fc302e6 100644 --- a/include/llvm/IntrinsicsARM.td +++ b/include/llvm/IntrinsicsARM.td @@ -303,7 +303,6 @@ def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic; def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic; // Narrowing and Lengthening Vector Moves. -def int_arm_neon_vmovn : Neon_1Arg_Narrow_Intrinsic; def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic; def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic; def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 741df7e..1132911 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -956,6 +956,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; +// Narrow 2-register operations. +class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyD, ValueType TyQ, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), + (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", + [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>; + // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -1579,6 +1588,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Narrowing 2-register vector operations, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, OpNode>; + def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, OpNode>; + def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, OpNode>; +} + // Neon Narrowing 2-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, @@ -3221,8 +3247,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, - "vmovn", "i", int_arm_neon_vmovn>; +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", trunc>; // VQMOVN : Vector Saturating Narrowing Move defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn", "s", int_arm_neon_vqmovns>; diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 62a4625..c994339 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -88,7 +88,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { ((Name.compare(14, 5, "vaddw", 5) == 0 || Name.compare(14, 5, "vsubw", 5) == 0) && (Name.compare(19, 2, "s.", 2) == 0 || - Name.compare(19, 2, "u.", 2) == 0))) { + Name.compare(19, 2, "u.", 2) == 0)) || + + (Name.compare(14, 6, "vmovn.", 6) == 0)) { // Calls to these are transformed into IR without intrinsics. NewFn = 0; @@ -401,6 +403,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { else NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI); + } else if (Name.compare(14, 6, "vmovn.", 6) == 0) { + NewI = new TruncInst(CI->getArgOperand(0), CI->getType(), + "upgraded." + CI->getName(), CI); } else { llvm_unreachable("Unknown arm.neon function for CallInst upgrade."); } diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll index d6d03d0..eafa94b 100644 --- a/test/Bitcode/neon-intrinsics.ll +++ b/test/Bitcode/neon-intrinsics.ll @@ -76,6 +76,20 @@ ; CHECK: zext <4 x i16> ; CHECK-NEXT: sub <4 x i32> +; vmovn should be auto-upgraded to trunc + +; CHECK: vmovni16 +; CHECK-NOT: arm.neon.vmovn.v8i8 +; CHECK: trunc <8 x i16> + +; CHECK: vmovni32 +; CHECK-NOT: arm.neon.vmovn.v4i16 +; CHECK: trunc <4 x i32> + +; CHECK: vmovni64 +; CHECK-NOT: arm.neon.vmovn.v2i32 +; CHECK: trunc <2 x i64> + ; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1) ; CHECK: vld1i8 diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc Binary files differindex bc01eb5..0441aa1 100644 --- a/test/Bitcode/neon-intrinsics.ll.bc +++ b/test/Bitcode/neon-intrinsics.ll.bc diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index f863bf8..8cd9457 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -240,7 +240,7 @@ define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { ;CHECK: vmovni16: ;CHECK: vmovn.i16 %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1) + %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8> ret <8 x i8> %tmp2 } @@ -248,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { ;CHECK: vmovni32: ;CHECK: vmovn.i32 %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1) + %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16> ret <4 x i16> %tmp2 } @@ -256,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { ;CHECK: vmovni64: ;CHECK: vmovn.i64 %tmp1 = load <2 x i64>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1) + %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32> ret <2 x i32> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone - define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { ;CHECK: vqmovns16: ;CHECK: vqmovn.s16 |