diff options
author | Chad Rosier <mcrosier@codeaurora.org> | 2013-10-18 14:03:24 +0000 |
---|---|---|
committer | Chad Rosier <mcrosier@codeaurora.org> | 2013-10-18 14:03:24 +0000 |
commit | c439c205ba304c7ed1c88fb85c2009e49cfbd0c3 (patch) | |
tree | e3baa66e0147a7198ec2a73997997f1de06b1b7e | |
parent | e1bc6ddc0bf671826a9b7230e321a42af75734f2 (diff) | |
download | external_llvm-c439c205ba304c7ed1c88fb85c2009e49cfbd0c3.zip external_llvm-c439c205ba304c7ed1c88fb85c2009e49cfbd0c3.tar.gz external_llvm-c439c205ba304c7ed1c88fb85c2009e49cfbd0c3.tar.bz2 |
[AArch64] Add support for NEON scalar extract narrow instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192970 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 48 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-extract-narrow.ll | 104 | ||||
-rw-r--r-- | test/MC/AArch64/neon-diagnostics.s | 55 | ||||
-rw-r--r-- | test/MC/AArch64/neon-scalar-extract-narrow.s | 40 | ||||
-rw-r--r-- | test/MC/Disassembler/AArch64/neon-instructions.txt | 30 |
5 files changed, 277 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 701250d..361909a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -3292,6 +3292,22 @@ multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop> [], NoItinerary>; } +multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode, + string asmop> { + def bh : NeonI_Scalar2SameMisc<u, 0b00, opcode, + (outs FPR8:$Rd), (ins FPR16:$Rn), + !strconcat(asmop, " $Rd, $Rn"), + [], NoItinerary>; + def hs : NeonI_Scalar2SameMisc<u, 0b01, opcode, + (outs FPR16:$Rd), (ins FPR32:$Rn), + !strconcat(asmop, " $Rd, $Rn"), + [], NoItinerary>; + def sd : NeonI_Scalar2SameMisc<u, 0b10, opcode, + (outs FPR32:$Rd), (ins FPR64:$Rn), + !strconcat(asmop, " $Rd, $Rn"), + [], NoItinerary>; +} + multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode, string asmop> { @@ -3366,6 +3382,20 @@ multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode, (INSTS FPR32:$Rn)>; } +multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns< + SDPatternOperator opnode, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))), + (INSTH FPR16:$Rn)>; + def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; + def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; + +} + multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< SDPatternOperator opnode, Instruction INSTB, @@ -3645,6 +3675,24 @@ defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd, USQADDbb, USQADDhh, USQADDss, USQADDdd>; +// Scalar Signed Saturating Extract Unsigned Narrow +defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; +defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu, + SQXTUNbh, SQXTUNhs, + SQXTUNsd>; + +// Scalar Signed Saturating Extract Narrow +defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">; +defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns, + SQXTNbh, SQXTNhs, + SQXTNsd>; + +// Scalar Unsigned Saturating Extract Narrow +defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">; +defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu, + UQXTNbh, UQXTNhs, + UQXTNsd>; + // Scalar Reduce Pairwise multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode, diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll new file mode 100644 index 0000000..faf521b --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll @@ -0,0 +1,104 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +define i8 @test_vqmovunh_s16(i16 %a) { +; CHECK: test_vqmovunh_s16 +; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}} +entry: + %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0 + %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i) + %0 = extractelement <1 x i8> %vqmovun1.i, i32 0 + ret i8 %0 +} + +define i16 @test_vqmovuns_s32(i32 %a) { +; CHECK: test_vqmovuns_s32 +; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}} +entry: + %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0 + %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i) + %0 = extractelement <1 x i16> %vqmovun1.i, i32 0 + ret i16 %0 +} + +define i32 @test_vqmovund_s64(i64 %a) { +; CHECK: test_vqmovund_s64 +; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}} +entry: + %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i) + %0 = extractelement <1 x i32> %vqmovun1.i, i32 0 + ret i32 %0 +} + +declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>) + +define i8 @test_vqmovnh_s16(i16 %a) { +; CHECK: test_vqmovnh_s16 +; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}} +entry: + %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0 + %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i) + %0 = extractelement <1 x i8> %vqmovn1.i, i32 0 + ret i8 %0 +} + +define i16 @test_vqmovns_s32(i32 %a) { +; CHECK: test_vqmovns_s32 +; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}} +entry: + %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0 + %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i) + %0 = extractelement <1 x i16> %vqmovn1.i, i32 0 + ret i16 %0 +} + +define i32 @test_vqmovnd_s64(i64 %a) { +; CHECK: test_vqmovnd_s64 +; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}} +entry: + %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i) + %0 = extractelement <1 x i32> %vqmovn1.i, i32 0 + ret i32 %0 +} + +declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>) + +define i8 @test_vqmovnh_u16(i16 %a) { +; CHECK: test_vqmovnh_u16 +; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}} +entry: + %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0 + %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i) + %0 = extractelement <1 x i8> %vqmovn1.i, i32 0 + ret i8 %0 +} + + +define i16 @test_vqmovns_u32(i32 %a) { +; CHECK: test_vqmovns_u32 +; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}} +entry: + %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0 + %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i) + %0 = extractelement <1 x i16> %vqmovn1.i, i32 0 + ret i16 %0 +} + +define i32 @test_vqmovnd_u64(i64 %a) { +; CHECK: test_vqmovnd_u64 +; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}} +entry: + %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i) + %0 = extractelement <1 x i32> %vqmovn1.i, i32 0 + ret i32 %0 +} + +declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>) diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 44e4561..e0675e2 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -4502,3 +4502,58 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull d15, s22, d12 // CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed Saturating Extract Unsigned Narrow +//---------------------------------------------------------------------- + + sqxtun b19, b14 + sqxtun h21, h15 + sqxtun s20, s12 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun b19, b14 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun h21, h15 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun s20, s12 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed Saturating Extract Signed Narrow +//---------------------------------------------------------------------- + + sqxtn b18, b18 + sqxtn h20, h17 + sqxtn s19, s14 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn b18, b18 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn h20, h17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn s19, s14 +// CHECK-ERROR: ^ + + +//---------------------------------------------------------------------- +// Scalar Unsigned Saturating Extract Narrow +//---------------------------------------------------------------------- + + uqxtn b18, b18 + uqxtn h20, h17 + uqxtn s19, s14 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn b18, b18 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn h20, h17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn s19, s14 +// CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s new file mode 100644 index 0000000..e25224e --- /dev/null +++ b/test/MC/AArch64/neon-scalar-extract-narrow.s @@ -0,0 +1,40 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//---------------------------------------------------------------------- +// Scalar Signed Saturating Extract Unsigned Narrow +//---------------------------------------------------------------------- + + sqxtun b19, h14 + sqxtun h21, s15 + sqxtun s20, d12 + +// CHECK: sqxtun b19, h14 // encoding: [0xd3,0x29,0x21,0x7e] +// CHECK: sqxtun h21, s15 // encoding: [0xf5,0x29,0x61,0x7e] +// CHECK: sqxtun s20, d12 // encoding: [0x94,0x29,0xa1,0x7e] + +//---------------------------------------------------------------------- +// Scalar Signed Saturating Extract Signed Narrow +//---------------------------------------------------------------------- + + sqxtn b18, h18 + sqxtn h20, s17 + sqxtn s19, d14 + +// CHECK: sqxtn b18, h18 // encoding: [0x52,0x4a,0x21,0x5e] +// CHECK: sqxtn h20, s17 // encoding: [0x34,0x4a,0x61,0x5e] +// CHECK: sqxtn s19, d14 // encoding: [0xd3,0x49,0xa1,0x5e] + + +//---------------------------------------------------------------------- +// Scalar Unsigned Saturating Extract Narrow +//---------------------------------------------------------------------- + + uqxtn b18, h18 + uqxtn h20, s17 + uqxtn s19, d14 + +// CHECK: uqxtn b18, h18 // encoding: [0x52,0x4a,0x21,0x7e] +// CHECK: uqxtn h20, s17 // encoding: [0x34,0x4a,0x61,0x7e] +// CHECK: uqxtn s19, d14 // encoding: [0xd3,0x49,0xa1,0x7e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 364259f..fa34c37 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1683,3 +1683,33 @@ # CHECK: sqdmull d15, s22, s12 0xcc,0xd2,0x6c,0x5e 0xcf,0xd2,0xac,0x5e + +#---------------------------------------------------------------------- +# Scalar Signed Saturating Extract Unsigned Narrow +#---------------------------------------------------------------------- +# CHECK: sqxtun b19, h14 +# CHECK: sqxtun h21, s15 +# CHECK: sqxtun s20, d12 +0xd3,0x29,0x21,0x7e +0xf5,0x29,0x61,0x7e +0x94,0x29,0xa1,0x7e + +#---------------------------------------------------------------------- +# Scalar Signed Saturating Extract Signed Narrow +#---------------------------------------------------------------------- +# CHECK: sqxtn b18, h18 +# CHECK: sqxtn h20, s17 +# CHECK: sqxtn s19, d14 +0x52,0x4a,0x21,0x5e +0x34,0x4a,0x61,0x5e +0xd3,0x49,0xa1,0x5e + +#---------------------------------------------------------------------- +# Scalar Unsigned Saturating Extract Narrow +#---------------------------------------------------------------------- +# CHECK: uqxtn b18, h18 +# CHECK: uqxtn h20, s17 +# CHECK: uqxtn s19, d14 +0x52,0x4a,0x21,0x7e +0x34,0x4a,0x61,0x7e +0xd3,0x49,0xa1,0x7e |