diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-11-27 06:35:16 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-11-27 06:35:16 +0000 |
commit | 2a0e97431ecef2aa6a24a16ced207d5b53fcfc2d (patch) | |
tree | 9c4d91c61cf20ac1f03f1ce1f854be7b9518d5fa /test | |
parent | 8d412946643f048daa9d76b4f021a172341ea045 (diff) | |
download | external_llvm-2a0e97431ecef2aa6a24a16ced207d5b53fcfc2d.zip external_llvm-2a0e97431ecef2aa6a24a16ced207d5b53fcfc2d.tar.gz external_llvm-2a0e97431ecef2aa6a24a16ced207d5b53fcfc2d.tar.bz2 |
Add NEON VLD1-dup instructions (load 1 element to all lanes).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120194 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/2009-11-02-NegativeLane.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/ARM/vlddup.ll | 41 |
2 files changed, 43 insertions, 1 deletions
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll index 89c9037..ca5ae8b 100644 --- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll +++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.16 +; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" @@ -7,6 +7,7 @@ entry: br i1 undef, label %return, label %bb bb: ; preds = %bb, %entry +; CHECK: vld1.16 {d16[], d17[]} %0 = load i16* undef, align 2 %1 = insertelement <8 x i16> undef, i16 %0, i32 2 %2 = insertelement <8 x i16> %1, i16 undef, i32 3 diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll new file mode 100644 index 0000000..f172ed5 --- /dev/null +++ b/test/CodeGen/ARM/vlddup.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vld1dupi8(i8* %A) nounwind { +;CHECK: vld1dupi8: +;Check the (default) alignment value. +;CHECK: vld1.8 {d16[]}, [r0] + %tmp1 = load i8* %A, align 8 + %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vld1dupi16(i16* %A) nounwind { +;CHECK: vld1dupi16: +;Check the alignment value. Max for this instruction is 16 bits: +;CHECK: vld1.16 {d16[]}, [r0, :16] + %tmp1 = load i16* %A, align 8 + %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0 + %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vld1dupi32(i32* %A) nounwind { +;CHECK: vld1dupi32: +;Check the alignment value. Max for this instruction is 32 bits: +;CHECK: vld1.32 {d16[]}, [r0, :32] + %tmp1 = load i32* %A, align 8 + %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 + %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer + ret <2 x i32> %tmp3 +} + +define <16 x i8> @vld1dupQi8(i8* %A) nounwind { +;CHECK: vld1dupQi8: +;Check the (default) alignment value. +;CHECK: vld1.8 {d16[], d17[]}, [r0] + %tmp1 = load i8* %A, align 8 + %tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %tmp3 +} |