aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@codeaurora.org>2013-10-18 14:03:24 +0000
committerChad Rosier <mcrosier@codeaurora.org>2013-10-18 14:03:24 +0000
commitc439c205ba304c7ed1c88fb85c2009e49cfbd0c3 (patch)
treee3baa66e0147a7198ec2a73997997f1de06b1b7e
parente1bc6ddc0bf671826a9b7230e321a42af75734f2 (diff)
downloadexternal_llvm-c439c205ba304c7ed1c88fb85c2009e49cfbd0c3.zip
external_llvm-c439c205ba304c7ed1c88fb85c2009e49cfbd0c3.tar.gz
external_llvm-c439c205ba304c7ed1c88fb85c2009e49cfbd0c3.tar.bz2
[AArch64] Add support for NEON scalar extract narrow instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192970 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td48
-rw-r--r--test/CodeGen/AArch64/neon-scalar-extract-narrow.ll104
-rw-r--r--test/MC/AArch64/neon-diagnostics.s55
-rw-r--r--test/MC/AArch64/neon-scalar-extract-narrow.s40
-rw-r--r--test/MC/Disassembler/AArch64/neon-instructions.txt30
5 files changed, 277 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 701250d..361909a 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -3292,6 +3292,22 @@ multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
[], NoItinerary>;
}
+multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bh : NeonI_Scalar2SameMisc<u, 0b00, opcode,
+ (outs FPR8:$Rd), (ins FPR16:$Rn),
+ !strconcat(asmop, " $Rd, $Rn"),
+ [], NoItinerary>;
+ def hs : NeonI_Scalar2SameMisc<u, 0b01, opcode,
+ (outs FPR16:$Rd), (ins FPR32:$Rn),
+ !strconcat(asmop, " $Rd, $Rn"),
+ [], NoItinerary>;
+ def sd : NeonI_Scalar2SameMisc<u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins FPR64:$Rn),
+ !strconcat(asmop, " $Rd, $Rn"),
+ [], NoItinerary>;
+}
+
multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
string asmop> {
@@ -3366,6 +3382,20 @@ multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
(INSTS FPR32:$Rn)>;
}
+multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+
+}
+
multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
SDPatternOperator opnode,
Instruction INSTB,
@@ -3645,6 +3675,24 @@ defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
USQADDbb, USQADDhh,
USQADDss, USQADDdd>;
+// Scalar Signed Saturating Extract Unsigned Narrow
+defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
+ SQXTUNbh, SQXTUNhs,
+ SQXTUNsd>;
+
+// Scalar Signed Saturating Extract Narrow
+defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
+ SQXTNbh, SQXTNhs,
+ SQXTNsd>;
+
+// Scalar Unsigned Saturating Extract Narrow
+defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
+ UQXTNbh, UQXTNhs,
+ UQXTNsd>;
+
// Scalar Reduce Pairwise
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
new file mode 100644
index 0000000..faf521b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define i8 @test_vqmovunh_s16(i16 %a) {
+; CHECK: test_vqmovunh_s16
+; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+ %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i)
+ %0 = extractelement <1 x i8> %vqmovun1.i, i32 0
+ ret i8 %0
+}
+
+define i16 @test_vqmovuns_s32(i32 %a) {
+; CHECK: test_vqmovuns_s32
+; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+ %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i)
+ %0 = extractelement <1 x i16> %vqmovun1.i, i32 0
+ ret i16 %0
+}
+
+define i32 @test_vqmovund_s64(i64 %a) {
+; CHECK: test_vqmovund_s64
+; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+ %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i)
+ %0 = extractelement <1 x i32> %vqmovun1.i, i32 0
+ ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>)
+
+define i8 @test_vqmovnh_s16(i16 %a) {
+; CHECK: test_vqmovnh_s16
+; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+ %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i)
+ %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
+ ret i8 %0
+}
+
+define i16 @test_vqmovns_s32(i32 %a) {
+; CHECK: test_vqmovns_s32
+; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+ %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i)
+ %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
+ ret i16 %0
+}
+
+define i32 @test_vqmovnd_s64(i64 %a) {
+; CHECK: test_vqmovnd_s64
+; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+ %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i)
+ %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
+ ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>)
+
+define i8 @test_vqmovnh_u16(i16 %a) {
+; CHECK: test_vqmovnh_u16
+; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+ %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i)
+ %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
+ ret i8 %0
+}
+
+
+define i16 @test_vqmovns_u32(i32 %a) {
+; CHECK: test_vqmovns_u32
+; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+ %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i)
+ %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
+ ret i16 %0
+}
+
+define i32 @test_vqmovnd_u64(i64 %a) {
+; CHECK: test_vqmovnd_u64
+; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+ %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i)
+ %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
+ ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>)
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 44e4561..e0675e2 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -4502,3 +4502,58 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull d15, s22, d12
// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Unsigned Narrow
+//----------------------------------------------------------------------
+
+ sqxtun b19, b14
+ sqxtun h21, h15
+ sqxtun s20, s12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqxtun b19, b14
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqxtun h21, h15
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqxtun s20, s12
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Signed Narrow
+//----------------------------------------------------------------------
+
+ sqxtn b18, b18
+ sqxtn h20, h17
+ sqxtn s19, s14
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqxtn b18, b18
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqxtn h20, h17
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqxtn s19, s14
+// CHECK-ERROR: ^
+
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+ uqxtn b18, b18
+ uqxtn h20, h17
+ uqxtn s19, s14
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uqxtn b18, b18
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uqxtn h20, h17
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uqxtn s19, s14
+// CHECK-ERROR: ^
diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s
new file mode 100644
index 0000000..e25224e
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-extract-narrow.s
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Unsigned Narrow
+//----------------------------------------------------------------------
+
+ sqxtun b19, h14
+ sqxtun h21, s15
+ sqxtun s20, d12
+
+// CHECK: sqxtun b19, h14 // encoding: [0xd3,0x29,0x21,0x7e]
+// CHECK: sqxtun h21, s15 // encoding: [0xf5,0x29,0x61,0x7e]
+// CHECK: sqxtun s20, d12 // encoding: [0x94,0x29,0xa1,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Signed Narrow
+//----------------------------------------------------------------------
+
+ sqxtn b18, h18
+ sqxtn h20, s17
+ sqxtn s19, d14
+
+// CHECK: sqxtn b18, h18 // encoding: [0x52,0x4a,0x21,0x5e]
+// CHECK: sqxtn h20, s17 // encoding: [0x34,0x4a,0x61,0x5e]
+// CHECK: sqxtn s19, d14 // encoding: [0xd3,0x49,0xa1,0x5e]
+
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+ uqxtn b18, h18
+ uqxtn h20, s17
+ uqxtn s19, d14
+
+// CHECK: uqxtn b18, h18 // encoding: [0x52,0x4a,0x21,0x7e]
+// CHECK: uqxtn h20, s17 // encoding: [0x34,0x4a,0x61,0x7e]
+// CHECK: uqxtn s19, d14 // encoding: [0xd3,0x49,0xa1,0x7e]
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 364259f..fa34c37 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -1683,3 +1683,33 @@
# CHECK: sqdmull d15, s22, s12
0xcc,0xd2,0x6c,0x5e
0xcf,0xd2,0xac,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Extract Unsigned Narrow
+#----------------------------------------------------------------------
+# CHECK: sqxtun b19, h14
+# CHECK: sqxtun h21, s15
+# CHECK: sqxtun s20, d12
+0xd3,0x29,0x21,0x7e
+0xf5,0x29,0x61,0x7e
+0x94,0x29,0xa1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Extract Signed Narrow
+#----------------------------------------------------------------------
+# CHECK: sqxtn b18, h18
+# CHECK: sqxtn h20, s17
+# CHECK: sqxtn s19, d14
+0x52,0x4a,0x21,0x5e
+0x34,0x4a,0x61,0x5e
+0xd3,0x49,0xa1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Saturating Extract Narrow
+#----------------------------------------------------------------------
+# CHECK: uqxtn b18, h18
+# CHECK: uqxtn h20, s17
+# CHECK: uqxtn s19, d14
+0x52,0x4a,0x21,0x7e
+0x34,0x4a,0x61,0x7e
+0xd3,0x49,0xa1,0x7e