diff options
author | Jim Grosbach <grosbach@apple.com> | 2013-09-03 20:08:17 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2013-09-03 20:08:17 +0000 |
commit | b81d7a89129ff1621fa609144b400df3500542d6 (patch) | |
tree | 3b3fe8da2dc87227452cb6babf29feb8c0d577a0 /test | |
parent | bd18c8d0903b695bd503a45cf11901d48eea61bd (diff) | |
download | external_llvm-b81d7a89129ff1621fa609144b400df3500542d6.zip external_llvm-b81d7a89129ff1621fa609144b400df3500542d6.tar.gz external_llvm-b81d7a89129ff1621fa609144b400df3500542d6.tar.bz2 |
Revert "Revert "ARM: Improve pattern for isel mul of vector by scalar.""
This reverts commit r189648.
Fixes for the previously failing clang-side arm_neon_intrinsics test
cases will be checked in separately.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189841 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/vmul.ll | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 6210ad3..5e5e99b 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -623,3 +623,21 @@ entry: store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 ret void } + +define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind { +; Look for doing a normal scalar FP load rather than an to-all-lanes load. +; e.g., "ldr s0, [r2]" rathern than "vld1.32 {d18[], d19[]}, [r2:32]" +; Then check that the vector multiply has folded the splat to all lanes +; and used a vector * scalar instruction. +; CHECK: vldr {{s[0-9]+}}, [r2] +; CHECK: vmul.f32 q8, q8, d0[0] + %tmp = load float* %src, align 4 + %tmp5 = load <4 x float>* %a, align 4 + %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0 + %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1 + %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2 + %tmp9 = insertelement <4 x float> %tmp8, float %tmp, i32 3 + %tmp10 = fmul <4 x float> %tmp9, %tmp5 + store <4 x float> %tmp10, <4 x float>* %dst, align 4 + ret void +} |