diff options
author | Evan Cheng <evan.cheng@apple.com> | 2012-09-18 01:42:45 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2012-09-18 01:42:45 +0000 |
commit | d10eab0a95dcfff6390cc73b50ca07fd8b98b0bc (patch) | |
tree | 43188b1a134f52ef023e3686f292662f656fa0b7 /test | |
parent | b198f5c8979d46d75a08c1710a160f8e102b9ba8 (diff) | |
download | external_llvm-d10eab0a95dcfff6390cc73b50ca07fd8b98b0bc.zip external_llvm-d10eab0a95dcfff6390cc73b50ca07fd8b98b0bc.tar.gz external_llvm-d10eab0a95dcfff6390cc73b50ca07fd8b98b0bc.tar.bz2 |
Use vld1 / vst2 for unaligned v2f64 load / store. e.g. Use vld1.16 for 2-byte
aligned address. Based on patch by David Peixotto.
Also use vld1.64 / vst1.64 with 128-bit alignment to take advantage of alignment
hints. rdar://12090772, rdar://12238782
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164089 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/2011-10-26-memset-with-neon.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll | 44 | ||||
-rw-r--r-- | test/CodeGen/ARM/neon_ld2.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/ARM/reg_sequence.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/twoaddrinstr.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/ARM/unaligned_load_store.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/ARM/unaligned_load_store_vector.ll | 487 | ||||
-rw-r--r-- | test/CodeGen/ARM/vbsl-constant.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/buildvector-crash.ll | 4 |
9 files changed, 551 insertions, 48 deletions
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll index 42b1491..6e0ef96 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -9,8 +9,8 @@ entry: } ; Trigger multiple NEON stores. -; CHECK: vstmia -; CHECK-NEXT: vstmia +; CHECK: vst1.64 +; CHECK-NEXT: vst1.64 define void @f_0_40(i8* nocapture %c) nounwind optsize { entry: call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index 1769ee5..f9ede74 100644 --- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -8,12 +8,12 @@ define void @test_sqrt(<4 x float>* %X) nounwind { ; CHECK: movw r1, :lower16:{{.*}} ; CHECK: movt r1, :upper16:{{.*}} -; CHECK: vldmia r1 +; CHECK: vld1.64 {{.*}}, [r1, :128] ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 {{.*}} L.entry: %0 = load <4 x float>* @A, align 16 @@ -31,7 +31,7 @@ define void @test_cos(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}cosf @@ -45,7 +45,7 @@ define void @test_cos(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}cosf -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -62,7 +62,7 @@ define void @test_exp(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}expf @@ -76,7 +76,7 @@ define void @test_exp(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}expf -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -93,7 +93,7 @@ define void @test_exp2(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}exp2f @@ -107,7 +107,7 @@ define void @test_exp2(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}exp2f -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -124,7 +124,7 @@ define void @test_log10(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}log10f @@ -138,7 +138,7 @@ define void @test_log10(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}log10f -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -155,7 +155,7 @@ define void @test_log(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}logf @@ -169,7 +169,7 @@ define void @test_log(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}logf -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -186,7 +186,7 @@ define void @test_log2(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}log2f @@ -200,7 +200,7 @@ define void @test_log2(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}log2f -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -218,7 +218,7 @@ define void @test_pow(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}powf @@ -232,7 +232,7 @@ define void @test_pow(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}powf -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: @@ -252,10 +252,10 @@ define void @test_powi(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia [[reg0]], {{.*}} +; CHECK: vld1.64 {{.*}}, :128 ; CHECK: vmul.f32 {{.*}} -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: @@ -275,7 +275,7 @@ define void @test_sin(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}sinf @@ -289,7 +289,7 @@ define void @test_sin(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}sinf -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 @@ -306,7 +306,7 @@ define void @test_floor(<4 x float>* %X) nounwind { ; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} ; CHECK: movt [[reg0]], :upper16:{{.*}} -; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} +; CHECK: vld1.64 ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}floorf @@ -320,7 +320,7 @@ define void @test_floor(<4 x float>* %X) nounwind { ; CHECK: {{v?mov(.32)?}} r0, ; CHECK: bl {{.*}}floorf -; CHECK: vstmia {{.*}} +; CHECK: vst1.64 L.entry: %0 = load <4 x float>* @A, align 16 diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll index 944bfe0..630db93 100644 --- a/test/CodeGen/ARM/neon_ld2.ll +++ b/test/CodeGen/ARM/neon_ld2.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; CHECK: t1 -; CHECK: vldmia -; CHECK: vldmia +; CHECK: vld1.64 +; CHECK: vld1.64 ; CHECK: vadd.i64 q -; CHECK: vstmia +; CHECK: vst1.64 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind { entry: %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1] @@ -16,8 +16,8 @@ entry: } ; CHECK: t2 -; CHECK: vldmia -; CHECK: vldmia +; CHECK: vld1.64 +; CHECK: vld1.64 ; CHECK: vsub.i64 q ; CHECK: vmov r0, r1, d ; CHECK: vmov r2, r3, d diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 05794e4..92c0f0a 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -137,7 +137,7 @@ return2: define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: t5: -; CHECK: vldmia +; CHECK: vld1.32 ; How can FileCheck match Q and D registers? We need a lisp interpreter. ; CHECK: vorr {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov @@ -243,8 +243,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK: vldr ; CHECK-NOT: vmov d{{.*}}, d16 ; CHECK: vmov.i32 d17 -; CHECK-NEXT: vstmia r0, {d16, d17} -; CHECK-NEXT: vstmia r0, {d16, d17} +; CHECK-NEXT: vst1.64 {d16, d17}, [r0, :128] +; CHECK-NEXT: vst1.64 {d16, d17}, [r0, :128] %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] store <4 x float> %4, <4 x float>* undef, align 16 diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll index 4e227dd..78202bf 100644 --- a/test/CodeGen/ARM/twoaddrinstr.ll +++ b/test/CodeGen/ARM/twoaddrinstr.ll @@ -4,12 +4,12 @@ define void @PR13378() nounwind { ; This was orriginally a crasher trying to schedule the instructions. ; CHECK: PR13378: -; CHECK: vldmia +; CHECK: vld1.32 +; CHECK-NEXT: vst1.32 +; CHECK-NEXT: vst1.32 ; CHECK-NEXT: vmov.f32 -; CHECK-NEXT: vstmia -; CHECK-NEXT: vstmia ; CHECK-NEXT: vmov.f32 -; CHECK-NEXT: vstmia +; CHECK-NEXT: vst1.32 entry: %0 = load <4 x float>* undef diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index 869b926..3064202 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED -; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED +; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED ; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED ; rdar://7113725 @@ -59,3 +59,19 @@ entry: store double %tmp, double* %b, align 1 ret void } + +define void @byte_word_ops(i32* %a, i32* %b) nounwind { +entry: +; EXPANDED: byte_word_ops: +; EXPANDED: ldrb +; EXPANDED: strb + +; UNALIGNED: byte_word_ops: +; UNALIGNED-NOT: ldrb +; UNALIGNED: ldr +; UNALIGNED-NOT: strb +; UNALIGNED: str + %tmp = load i32* %a, align 1 + store i32 %tmp, i32* %b, align 1 + ret void +} diff --git a/test/CodeGen/ARM/unaligned_load_store_vector.ll b/test/CodeGen/ARM/unaligned_load_store_vector.ll new file mode 100644 index 0000000..25ae651 --- /dev/null +++ b/test/CodeGen/ARM/unaligned_load_store_vector.ll @@ -0,0 +1,487 @@ +;RUN: llc < %s -march=arm -mattr=+v7 -mattr=+neon | FileCheck %s + +;ALIGN = 1 +;SIZE = 64 +;TYPE = <8 x i8> +define void @v64_v8i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v8i8_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <8 x i8>* + %vo = bitcast i8* %po to <8 x i8>* +;CHECK: vld1.8 + %v1 = load <8 x i8>* %vi, align 1 +;CHECK: vst1.8 + store <8 x i8> %v1, <8 x i8>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 64 +;TYPE = <4 x i16> +define void @v64_v4i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v4i16_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x i16>* + %vo = bitcast i8* %po to <4 x i16>* +;CHECK: vld1.8 + %v1 = load <4 x i16>* %vi, align 1 +;CHECK: vst1.8 + store <4 x i16> %v1, <4 x i16>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 64 +;TYPE = <2 x i32> +define void @v64_v2i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v2i32_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x i32>* + %vo = bitcast i8* %po to <2 x i32>* +;CHECK: vld1.8 + %v1 = load <2 x i32>* %vi, align 1 +;CHECK: vst1.8 + store <2 x i32> %v1, <2 x i32>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 64 +;TYPE = <2 x float> +define void @v64_v2f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v2f32_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x float>* + %vo = bitcast i8* %po to <2 x float>* +;CHECK: vld1.8 + %v1 = load <2 x float>* %vi, align 1 +;CHECK: vst1.8 + store <2 x float> %v1, <2 x float>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 128 +;TYPE = <16 x i8> +define void @v128_v16i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v16i8_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <16 x i8>* + %vo = bitcast i8* %po to <16 x i8>* +;CHECK: vld1.8 + %v1 = load <16 x i8>* %vi, align 1 +;CHECK: vst1.8 + store <16 x i8> %v1, <16 x i8>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 128 +;TYPE = <8 x i16> +define void @v128_v8i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v8i16_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <8 x i16>* + %vo = bitcast i8* %po to <8 x i16>* +;CHECK: vld1.8 + %v1 = load <8 x i16>* %vi, align 1 +;CHECK: vst1.8 + store <8 x i16> %v1, <8 x i16>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 128 +;TYPE = <4 x i32> +define void @v128_v4i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v4i32_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x i32>* + %vo = bitcast i8* %po to <4 x i32>* +;CHECK: vld1.8 + %v1 = load <4 x i32>* %vi, align 1 +;CHECK: vst1.8 + store <4 x i32> %v1, <4 x i32>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 128 +;TYPE = <2 x i64> +define void @v128_v2i64_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v2i64_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x i64>* + %vo = bitcast i8* %po to <2 x i64>* +;CHECK: vld1.8 + %v1 = load <2 x i64>* %vi, align 1 +;CHECK: vst1.8 + store <2 x i64> %v1, <2 x i64>* %vo, align 1 + ret void +} + + +;ALIGN = 1 +;SIZE = 128 +;TYPE = <4 x float> +define void @v128_v4f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v4f32_1: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x float>* + %vo = bitcast i8* %po to <4 x float>* +;CHECK: vld1.8 + %v1 = load <4 x float>* %vi, align 1 +;CHECK: vst1.8 + store <4 x float> %v1, <4 x float>* %vo, align 1 + ret void +} + + +;ALIGN = 2 +;SIZE = 64 +;TYPE = <8 x i8> +define void @v64_v8i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v8i8_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <8 x i8>* + %vo = bitcast i8* %po to <8 x i8>* +;CHECK: vld1.16 + %v1 = load <8 x i8>* %vi, align 2 +;CHECK: vst1.16 + store <8 x i8> %v1, <8 x i8>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 64 +;TYPE = <4 x i16> +define void @v64_v4i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v4i16_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x i16>* + %vo = bitcast i8* %po to <4 x i16>* +;CHECK: vld1.16 + %v1 = load <4 x i16>* %vi, align 2 +;CHECK: vst1.16 + store <4 x i16> %v1, <4 x i16>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 64 +;TYPE = <2 x i32> +define void @v64_v2i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v2i32_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x i32>* + %vo = bitcast i8* %po to <2 x i32>* +;CHECK: vld1.16 + %v1 = load <2 x i32>* %vi, align 2 +;CHECK: vst1.16 + store <2 x i32> %v1, <2 x i32>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 64 +;TYPE = <2 x float> +define void @v64_v2f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v2f32_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x float>* + %vo = bitcast i8* %po to <2 x float>* +;CHECK: vld1.16 + %v1 = load <2 x float>* %vi, align 2 +;CHECK: vst1.16 + store <2 x float> %v1, <2 x float>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 128 +;TYPE = <16 x i8> +define void @v128_v16i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v16i8_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <16 x i8>* + %vo = bitcast i8* %po to <16 x i8>* +;CHECK: vld1.16 + %v1 = load <16 x i8>* %vi, align 2 +;CHECK: vst1.16 + store <16 x i8> %v1, <16 x i8>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 128 +;TYPE = <8 x i16> +define void @v128_v8i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v8i16_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <8 x i16>* + %vo = bitcast i8* %po to <8 x i16>* +;CHECK: vld1.16 + %v1 = load <8 x i16>* %vi, align 2 +;CHECK: vst1.16 + store <8 x i16> %v1, <8 x i16>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 128 +;TYPE = <4 x i32> +define void @v128_v4i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v4i32_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x i32>* + %vo = bitcast i8* %po to <4 x i32>* +;CHECK: vld1.16 + %v1 = load <4 x i32>* %vi, align 2 +;CHECK: vst1.16 + store <4 x i32> %v1, <4 x i32>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 128 +;TYPE = <2 x i64> +define void @v128_v2i64_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v2i64_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x i64>* + %vo = bitcast i8* %po to <2 x i64>* +;CHECK: vld1.16 + %v1 = load <2 x i64>* %vi, align 2 +;CHECK: vst1.16 + store <2 x i64> %v1, <2 x i64>* %vo, align 2 + ret void +} + + +;ALIGN = 2 +;SIZE = 128 +;TYPE = <4 x float> +define void @v128_v4f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v4f32_2: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x float>* + %vo = bitcast i8* %po to <4 x float>* +;CHECK: vld1.16 + %v1 = load <4 x float>* %vi, align 2 +;CHECK: vst1.16 + store <4 x float> %v1, <4 x float>* %vo, align 2 + ret void +} + + +;ALIGN = 4 +;SIZE = 64 +;TYPE = <8 x i8> +define void @v64_v8i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v8i8_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <8 x i8>* + %vo = bitcast i8* %po to <8 x i8>* +;CHECK: vldr + %v1 = load <8 x i8>* %vi, align 4 +;CHECK: vstr + store <8 x i8> %v1, <8 x i8>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 64 +;TYPE = <4 x i16> +define void @v64_v4i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v4i16_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x i16>* + %vo = bitcast i8* %po to <4 x i16>* +;CHECK: vldr + %v1 = load <4 x i16>* %vi, align 4 +;CHECK: vstr + store <4 x i16> %v1, <4 x i16>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 64 +;TYPE = <2 x i32> +define void @v64_v2i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v2i32_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x i32>* + %vo = bitcast i8* %po to <2 x i32>* +;CHECK: vldr + %v1 = load <2 x i32>* %vi, align 4 +;CHECK: vstr + store <2 x i32> %v1, <2 x i32>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 64 +;TYPE = <2 x float> +define void @v64_v2f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v64_v2f32_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x float>* + %vo = bitcast i8* %po to <2 x float>* +;CHECK: vldr + %v1 = load <2 x float>* %vi, align 4 +;CHECK: vstr + store <2 x float> %v1, <2 x float>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 128 +;TYPE = <16 x i8> +define void @v128_v16i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v16i8_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <16 x i8>* + %vo = bitcast i8* %po to <16 x i8>* +;CHECK: vld1.32 + %v1 = load <16 x i8>* %vi, align 4 +;CHECK: vst1.32 + store <16 x i8> %v1, <16 x i8>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 128 +;TYPE = <8 x i16> +define void @v128_v8i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v8i16_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <8 x i16>* + %vo = bitcast i8* %po to <8 x i16>* +;CHECK: vld1.32 + %v1 = load <8 x i16>* %vi, align 4 +;CHECK: vst1.32 + store <8 x i16> %v1, <8 x i16>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 128 +;TYPE = <4 x i32> +define void @v128_v4i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v4i32_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x i32>* + %vo = bitcast i8* %po to <4 x i32>* +;CHECK: vld1.32 + %v1 = load <4 x i32>* %vi, align 4 +;CHECK: vst1.32 + store <4 x i32> %v1, <4 x i32>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 128 +;TYPE = <2 x i64> +define void @v128_v2i64_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v2i64_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <2 x i64>* + %vo = bitcast i8* %po to <2 x i64>* +;CHECK: vld1.32 + %v1 = load <2 x i64>* %vi, align 4 +;CHECK: vst1.32 + store <2 x i64> %v1, <2 x i64>* %vo, align 4 + ret void +} + + +;ALIGN = 4 +;SIZE = 128 +;TYPE = <4 x float> +define void @v128_v4f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind { +;CHECK: v128_v4f32_4: +entry: + %po = getelementptr i8* %out, i32 0 + %pi = getelementptr i8* %in, i32 0 + %vi = bitcast i8* %pi to <4 x float>* + %vo = bitcast i8* %po to <4 x float>* +;CHECK: vld1.32 + %v1 = load <4 x float>* %vi, align 4 +;CHECK: vst1.32 + store <4 x float> %v1, <4 x float>* %vo, align 4 + ret void +} + diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll index f157dbd..7542037 100644 --- a/test/CodeGen/ARM/vbsl-constant.ll +++ b/test/CodeGen/ARM/vbsl-constant.ll @@ -59,8 +59,8 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { ;CHECK: v_bslQi8: -;CHECK: vldmia -;CHECK: vldmia +;CHECK: vld1.32 +;CHECK: vld1.32 ;CHECK: vbsl %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B @@ -73,8 +73,8 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { ;CHECK: v_bslQi16: -;CHECK: vldmia -;CHECK: vldmia +;CHECK: vld1.32 +;CHECK: vld1.32 ;CHECK: vbsl %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B @@ -87,8 +87,8 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { ;CHECK: v_bslQi32: -;CHECK: vldmia -;CHECK: vldmia +;CHECK: vld1.32 +;CHECK: vld1.32 ;CHECK: vbsl %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B @@ -101,9 +101,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind { ;CHECK: v_bslQi64: -;CHECK: vldmia -;CHECK: vldmia -;CHECK: vldmia +;CHECK: vld1.32 +;CHECK: vld1.32 +;CHECK: vld1.64 ;CHECK: vbsl %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll index 01ef472..ce42f4b 100644 --- a/test/CodeGen/Thumb2/buildvector-crash.ll +++ b/test/CodeGen/Thumb2/buildvector-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s ; Formerly crashed, 3573915. define void @RotateStarsFP_Vec() nounwind { @@ -13,5 +13,5 @@ bb8: ; preds = %bb8, %bb.nph372 store <4 x float> %3, <4 x float>* undef, align 4 br label %bb8 ; CHECK: RotateStarsFP_Vec: -; CHECK: vldmia +; CHECK: vld1.64 } |