From 477fc628b3c9ce1c970d4a678dd5607b15242cc8 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Tue, 24 Sep 2013 02:47:27 +0000 Subject: Initial support for Neon scalar instructions. Patch by Ana Pazos. 1.Added support for v1ix and v1fx types. 2.Added Scalar Pairwise Reduce instructions. 3.Added initial implementation of Scalar Arithmetic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191263 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/neon-add-sub.ll | 12 -- test/CodeGen/AArch64/neon-copy.ll | 2 +- test/CodeGen/AArch64/neon-rounding-shift.ll | 17 -- test/CodeGen/AArch64/neon-saturating-add-sub.ll | 33 ---- .../AArch64/neon-saturating-rounding-shift.ll | 17 -- test/CodeGen/AArch64/neon-saturating-shift.ll | 17 -- test/CodeGen/AArch64/neon-scalar-add-sub.ll | 50 ++++++ .../CodeGen/AArch64/neon-scalar-reduce-pairwise.ll | 103 +++++++++++++ test/CodeGen/AArch64/neon-scalar-rounding-shift.ll | 39 +++++ .../AArch64/neon-scalar-saturating-add-sub.ll | 171 +++++++++++++++++++++ .../neon-scalar-saturating-rounding-shift.ll | 94 +++++++++++ .../AArch64/neon-scalar-saturating-shift.ll | 88 +++++++++++ test/CodeGen/AArch64/neon-scalar-shift.ll | 38 +++++ test/CodeGen/AArch64/neon-shift.ll | 17 -- 14 files changed, 584 insertions(+), 114 deletions(-) create mode 100644 test/CodeGen/AArch64/neon-scalar-add-sub.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-rounding-shift.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-shift.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-shift.ll (limited to 'test/CodeGen/AArch64') diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll index 65ec8a2..566e029 100644 --- a/test/CodeGen/AArch64/neon-add-sub.ll +++ b/test/CodeGen/AArch64/neon-add-sub.ll @@ -118,15 +118,3 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) { ret <2 x double> %tmp3 } -define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} - %tmp3 = add <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} - %tmp3 = sub <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index c2854ed..2c50059 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -146,7 +146,7 @@ define i32 @umovw2s(<2 x i32> %tmp1) { } define i64 @umovx1d(<1 x i64> %tmp1) { -;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] +;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}} %tmp3 = extractelement <1 x i64> %tmp1, i32 0 ret i64 %tmp3 } diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll index 404e491..5b4ec28 100644 --- a/test/CodeGen/AArch64/neon-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-rounding-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_urshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: urshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_srshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: srshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll index b2fac1f..fc60d90 100644 --- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll +++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll @@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqadd_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqadd d0, d0, d1 - ret <1 x i64> %tmp1 -} -define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqadd_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqadd d0, d0, d1 - ret <1 x i64> %tmp1 -} declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) @@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; CHECK: sqsub v0.2d, v0.2d, v1.2d ret <2 x i64> %tmp1 } - -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqsub_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqsub d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqsub_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqsub d0, d0, d1 - ret <1 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll index 05d8dfe..d89262c 100644 --- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqrshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqrshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll index 3b7f78c..11009fb 100644 --- a/test/CodeGen/AArch64/neon-saturating-shift.ll +++ b/test/CodeGen/AArch64/neon-saturating-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll new file mode 100644 index 0000000..09ca880 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + %tmp3 = add <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + %tmp3 = sub <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_add_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uadd_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sub_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_usub_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + + diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll new file mode 100644 index 0000000..309997b --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=aarch64 -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) + +define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { +; CHECK: test_addp_v1i64: + %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) +; CHECK: addp d0, v0.2d + ret <1 x i64> %val +} + +declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>) + +define <1 x float> @test_faddp_v1f32(<2 x float> %a) { +; CHECK: test_faddp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a) +; CHECK: faddp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>) + +define <1 x double> @test_faddp_v1f64(<2 x double> %a) { +; CHECK: test_faddp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a) +; CHECK: faddp d0, v0.2d + ret <1 x double> %val +} + + +declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>) + +define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) { +; CHECK: test_fmaxp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a) +; CHECK: fmaxp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>) + +define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) { +; CHECK: test_fmaxp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a) +; CHECK: fmaxp d0, v0.2d + ret <1 x double> %val +} + + +declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>) + +define <1 x float> @test_fminp_v1f32(<2 x float> %a) { +; CHECK: test_fminp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a) +; CHECK: fminp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>) + +define <1 x double> @test_fminp_v1f64(<2 x double> %a) { +; CHECK: test_fminp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a) +; CHECK: fminp d0, v0.2d + ret <1 x double> %val +} + +declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>) + +define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) { +; CHECK: test_fmaxnmp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a) +; CHECK: fmaxnmp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>) + +define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) { +; CHECK: test_fmaxnmp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a) +; CHECK: fmaxnmp d0, v0.2d + ret <1 x double> %val +} + +declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>) + +define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) { +; CHECK: test_fminnmp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a) +; CHECK: fminnmp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>) + +define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) { +; CHECK: test_fminnmp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a) +; CHECK: fminnmp d0, v0.2d + ret <1 x double> %val +} + diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll new file mode 100644 index 0000000..83ceb4e --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + + +declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_urshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_srshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_urshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_srshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + + diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll new file mode 100644 index 0000000..9e12978 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -0,0 +1,171 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqadd_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: uqadd d0, d0, d1 + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqadd_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sqadd d0, d0, d1 + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqsub_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: uqsub d0, d0, d1 + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqsub_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sqsub d0, d0, d1 + ret <1 x i64> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqadd_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqadd_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqsub_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqsub_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqadd_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqadd_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqsub_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqsub_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqadd_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqadd_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqsub_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqsub_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqadd_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqadd_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqsub_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqsub_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll new file mode 100644 index 0000000..0fd67df --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll @@ -0,0 +1,94 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqrshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqrshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqrshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqrshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqrshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqrshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqrshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqrshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqrshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqrshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + + diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll new file mode 100644 index 0000000..8fdea24 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll new file mode 100644 index 0000000..1222be5 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-shift.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_ushl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_ushl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll index 1b8b941..33b04ce 100644 --- a/test/CodeGen/AArch64/neon-shift.ll +++ b/test/CodeGen/AArch64/neon-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_ushl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) -- cgit v1.1