diff options
author | Kevin Qin <Kevin.Qin@arm.com> | 2013-09-17 02:21:02 +0000 |
---|---|---|
committer | Kevin Qin <Kevin.Qin@arm.com> | 2013-09-17 02:21:02 +0000 |
commit | e54360be01d1eaccd5ef27f510634927aaa887a4 (patch) | |
tree | de8361fd5b80233388e09af2757d380623f52f59 /test | |
parent | 24e1b39a24ca7b8866a636498173f3959b561058 (diff) | |
download | external_llvm-e54360be01d1eaccd5ef27f510634927aaa887a4.zip external_llvm-e54360be01d1eaccd5ef27f510634927aaa887a4.tar.gz external_llvm-e54360be01d1eaccd5ef27f510634927aaa887a4.tar.bz2 |
Implement 3 AArch64 neon instructions : umov smov ins.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190839 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/AArch64/neon-copy.ll | 232 | ||||
-rw-r--r-- | test/MC/AArch64/neon-simd-copy.s | 71 |
2 files changed, 303 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll new file mode 100644 index 0000000..c2854ed --- /dev/null +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -0,0 +1,232 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + + +define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}} + %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 + ret <16 x i8> %tmp3 +} + +define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}} + %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 + ret <8 x i16> %tmp3 +} + +define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}} + %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 + ret <4 x i32> %tmp3 +} + +define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { +;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}} + %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 + ret <2 x i64> %tmp3 +} + +define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}} + %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 + ret <8 x i8> %tmp3 +} + +define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}} + %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 + ret <4 x i16> %tmp3 +} + +define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}} + %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 + ret <2 x i32> %tmp3 +} + +define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2] + %tmp3 = extractelement <16 x i8> %tmp1, i32 2 + %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] + %tmp3 = extractelement <4 x i32> %tmp1, i32 2 + %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 + ret <4 x i32> %tmp4 +} + +define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { +;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] + %tmp3 = extractelement <2 x i64> %tmp1, i32 0 + %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 + ret <2 x i64> %tmp4 +} + +define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2] + %tmp3 = extractelement <8 x i8> %tmp1, i32 2 + %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0] + %tmp3 = extractelement <2 x i32> %tmp1, i32 0 + %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 + ret <2 x i32> %tmp4 +} + +define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { +;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] + %tmp3 = extractelement <1 x i64> %tmp1, i32 0 + %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 + ret <1 x i64> %tmp4 +} + +define i32 @umovw16b(<16 x i8> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8] + %tmp3 = extractelement <16 x i8> %tmp1, i32 8 + %tmp4 = zext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw8h(<8 x i16> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = zext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw4s(<4 x i32> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2] + %tmp3 = extractelement <4 x i32> %tmp1, i32 2 + ret i32 %tmp3 +} + +define i64 @umovx2d(<2 x i64> %tmp1) { +;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] + %tmp3 = extractelement <2 x i64> %tmp1, i32 0 + ret i64 %tmp3 +} + +define i32 @umovw8b(<8 x i8> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7] + %tmp3 = extractelement <8 x i8> %tmp1, i32 7 + %tmp4 = zext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw4h(<4 x i16> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = zext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw2s(<2 x i32> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1] + %tmp3 = extractelement <2 x i32> %tmp1, i32 1 + ret i32 %tmp3 +} + +define i64 @umovx1d(<1 x i64> %tmp1) { +;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] + %tmp3 = extractelement <1 x i64> %tmp1, i32 0 + ret i64 %tmp3 +} + +define i32 @smovw16b(<16 x i8> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8] + %tmp3 = extractelement <16 x i8> %tmp1, i32 8 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovw8h(<8 x i16> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovx16b(<16 x i8> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8] + %tmp3 = extractelement <16 x i8> %tmp1, i32 8 + %tmp4 = sext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @smovx8h(<8 x i16> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i64 @smovx4s(<4 x i32> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2] + %tmp3 = extractelement <4 x i32> %tmp1, i32 2 + %tmp4 = sext i32 %tmp3 to i64 + ret i64 %tmp4 +} + +define i32 @smovw8b(<8 x i8> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4] + %tmp3 = extractelement <8 x i8> %tmp1, i32 4 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovw4h(<4 x i16> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovx8b(<8 x i8> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6] + %tmp3 = extractelement <8 x i8> %tmp1, i32 6 + %tmp4 = sext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @smovx4h(<4 x i16> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i64 @smovx2s(<2 x i32> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1] + %tmp3 = extractelement <2 x i32> %tmp1, i32 1 + %tmp4 = sext i32 %tmp3 to i64 + ret i64 %tmp4 +} + + + + + + diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s new file mode 100644 index 0000000..44b5027 --- /dev/null +++ b/test/MC/AArch64/neon-simd-copy.s @@ -0,0 +1,71 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + + +//------------------------------------------------------------------------------ +// Insert element (vector, from main) +//------------------------------------------------------------------------------ + ins v2.b[2], w1 + ins v7.h[7], w14 + ins v20.s[0], w30 + ins v1.d[1], x7 + +// CHECK: ins v2.b[2], w1 // encoding: [0x22,0x1c,0x05,0x4e] +// CHECK: ins v7.h[7], w14 // encoding: [0xc7,0x1d,0x1e,0x4e] +// CHECK: ins v20.s[0], w30 // encoding: [0xd4,0x1f,0x04,0x4e] +// CHECK: ins v1.d[1], x7 // encoding: [0xe1,0x1c,0x18,0x4e] + + +//------------------------------------------------------------------------------ +// Signed integer move (main, from element) +//------------------------------------------------------------------------------ + smov w1, v0.b[15] + smov w14, v6.h[4] + smov x1, v0.b[15] + smov x14, v6.h[4] + smov x20, v9.s[2] + +// CHECK: smov w1, v0.b[15] // encoding: [0x01,0x2c,0x1f,0x0e] +// CHECK: smov w14, v6.h[4] // encoding: [0xce,0x2c,0x12,0x0e] +// CHECK: smov x1, v0.b[15] // encoding: [0x01,0x2c,0x1f,0x4e] +// CHECK: smov x14, v6.h[4] // encoding: [0xce,0x2c,0x12,0x4e] +// CHECK: smov x20, v9.s[2] // encoding: [0x34,0x2d,0x14,0x4e] + + +//------------------------------------------------------------------------------ +// Unsigned integer move (main, from element) +//------------------------------------------------------------------------------ + umov w1, v0.b[15] + umov w14, v6.h[4] + umov w20, v9.s[2] + umov x7, v18.d[1] + +// CHECK: umov w1, v0.b[15] // encoding: [0x01,0x3c,0x1f,0x0e] +// CHECK: umov w14, v6.h[4] // encoding: [0xce,0x3c,0x12,0x0e] +// CHECK: umov w20, v9.s[2] // encoding: [0x34,0x3d,0x14,0x0e] +// CHECK: umov x7, v18.d[1] // encoding: [0x47,0x3e,0x18,0x4e] + +//------------------------------------------------------------------------------ +// Insert element (vector, from element) +//------------------------------------------------------------------------------ + + Ins v1.b[14], v3.b[6] + Ins v6.h[7], v7.h[5] + Ins v15.s[3], v22.s[2] + Ins v0.d[0], v4.d[1] + +// CHECK: ins v1.b[14], v3.b[6] // encoding: [0x61,0x34,0x1d,0x6e] +// CHECK: ins v6.h[7], v7.h[5] // encoding: [0xe6,0x54,0x1e,0x6e] +// CHECK: ins v15.s[3], v22.s[2] // encoding: [0xcf,0x5e,0x1c,0x6e] +// CHECK: ins v0.d[0], v4.d[1] // encoding: [0x80,0x44,0x08,0x6e] + + + + + + + + + + |