diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-05-22 00:23:12 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-05-22 00:23:12 +0000 |
commit | be751cfe9cbcc760e24599a59e5b9699d4d4f9e0 (patch) | |
tree | 14c300ae47f4ee239f4174dd488de5e8e33ba24d /test/CodeGen | |
parent | 8116ca5134b355b897450f9a537c9c77e1f08723 (diff) | |
download | external_llvm-be751cfe9cbcc760e24599a59e5b9699d4d4f9e0.zip external_llvm-be751cfe9cbcc760e24599a59e5b9699d4d4f9e0.tar.gz external_llvm-be751cfe9cbcc760e24599a59e5b9699d4d4f9e0.tar.bz2 |
Recognize more BUILD_VECTORs and VECTOR_SHUFFLEs that can be implemented by
copying VFP subregs. This exposed a bunch of dead code in the *spill-q.ll
tests, so I tweaked those tests to keep that code from being optimized away.
Radar 7872877.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104415 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/ARM/2010-05-21-BuildVector.ll | 43 | ||||
-rw-r--r-- | test/CodeGen/ARM/spill-q.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-spill-q.ll | 3 |
3 files changed, 47 insertions, 2 deletions
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll new file mode 100644 index 0000000..6b19490 --- /dev/null +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; Radar 7872877 + +define arm_apcscc void @test(float* %fltp, i32 %packedValue, float* %table) nounwind { +entry: + %0 = load float* %fltp + %1 = insertelement <4 x float> undef, float %0, i32 0 + %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer + %3 = shl i32 %packedValue, 16 + %4 = ashr i32 %3, 30 + %.sum = add i32 %4, 4 + %5 = getelementptr inbounds float* %table, i32 %.sum +;CHECK: vldr.32 s + %6 = load float* %5, align 4 + %tmp11 = insertelement <4 x float> undef, float %6, i32 0 + %7 = shl i32 %packedValue, 18 + %8 = ashr i32 %7, 30 + %.sum12 = add i32 %8, 4 + %9 = getelementptr inbounds float* %table, i32 %.sum12 +;CHECK: vldr.32 s + %10 = load float* %9, align 4 + %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1 + %11 = shl i32 %packedValue, 20 + %12 = ashr i32 %11, 30 + %.sum13 = add i32 %12, 4 + %13 = getelementptr inbounds float* %table, i32 %.sum13 +;CHECK: vldr.32 s + %14 = load float* %13, align 4 + %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2 + %15 = shl i32 %packedValue, 22 + %16 = ashr i32 %15, 30 + %.sum14 = add i32 %16, 4 + %17 = getelementptr inbounds float* %table, i32 %.sum14 +;CHECK: vldr.32 s + %18 = load float* %17, align 4 + %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3 + %19 = fmul <4 x float> %tmp5, %2 + %20 = bitcast float* %fltp to i8* + tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19) + ret void +} + +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index 5ad7ecc..03de0c8 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -46,7 +46,8 @@ bb4: ; preds = %bb193, %entry %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2] %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0] - br i1 undef, label %bb193, label %bb186 + %tmp = extractelement <4 x i1> %22, i32 0 + br i1 %tmp, label %bb193, label %bb186 bb186: ; preds = %bb4 br label %bb193 diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index ff178b4..bf9c052 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -46,7 +46,8 @@ bb4: ; preds = %bb193, %entry %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2] %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0] - br i1 undef, label %bb193, label %bb186 + %tmp = extractelement <4 x i1> %22, i32 0 + br i1 %tmp, label %bb193, label %bb186 bb186: ; preds = %bb4 br label %bb193 |