aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/AArch64
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-12-08 00:07:48 +0000
committerBill Wendling <isanbard@gmail.com>2013-12-08 00:07:48 +0000
commitf04a4d74b86733b853b7445ab6d5a3bde025a30d (patch)
treedde98b9a0e589c10e128ca9b58a336e934cd85d9 /test/CodeGen/AArch64
parent488aab6df3723d49e256042b99e5ef2f5a9cf46b (diff)
downloadexternal_llvm-f04a4d74b86733b853b7445ab6d5a3bde025a30d.zip
external_llvm-f04a4d74b86733b853b7445ab6d5a3bde025a30d.tar.gz
external_llvm-f04a4d74b86733b853b7445ab6d5a3bde025a30d.tar.bz2
Merging r196456:
------------------------------------------------------------------------ r196456 | jiangning | 2013-12-04 18:12:01 -0800 (Wed, 04 Dec 2013) | 2 lines For AArch64, add missing register cost calculation for big value types like v4i64 and v8i64. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196700 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r--test/CodeGen/AArch64/neon-simd-ldst.ll164
1 files changed, 164 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll
new file mode 100644
index 0000000..afc0901
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define void @test_ldstq_4v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_4v
+; CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+; CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+entry:
+ %tobool62 = icmp eq i32 %count, 0
+ br i1 %tobool62, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+ %dec = add i32 %count.addr.063, -1
+ %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1)
+ %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
+ %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
+ %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
+ %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
+ tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1)
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldstq_3v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_3v
+; CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0]
+; CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0]
+entry:
+ %tobool47 = icmp eq i32 %count, 0
+ br i1 %tobool47, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+ %dec = add i32 %count.addr.048, -1
+ %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1)
+ %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
+ %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
+ %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
+ tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1)
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldstq_2v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_2v
+; CHECK: ld2 {v0.16b, v1.16b}, [x0]
+; CHECK: st2 {v0.16b, v1.16b}, [x0]
+entry:
+ %tobool22 = icmp eq i32 %count, 0
+ br i1 %tobool22, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+ %dec = add i32 %count.addr.023, -1
+ %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1)
+ %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
+ %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
+ tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1)
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldst_4v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_4v
+; CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+; CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+entry:
+ %tobool42 = icmp eq i32 %count, 0
+ br i1 %tobool42, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+ %dec = add i32 %count.addr.043, -1
+ %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1)
+ %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
+ %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
+ %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
+ %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
+ tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1)
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define void @test_ldst_3v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_3v
+; CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0]
+; CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0]
+entry:
+ %tobool32 = icmp eq i32 %count, 0
+ br i1 %tobool32, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+ %dec = add i32 %count.addr.033, -1
+ %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1)
+ %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
+ %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
+ %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
+ tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1)
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define void @test_ldst_2v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_2v
+; CHECK: ld2 {v0.8b, v1.8b}, [x0]
+; CHECK: st2 {v0.8b, v1.8b}, [x0]
+entry:
+ %tobool22 = icmp eq i32 %count, 0
+ br i1 %tobool22, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+ %dec = add i32 %count.addr.023, -1
+ %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1)
+ %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
+ %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
+ tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1)
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+