From 746fa17d59000be7f642a0b6c5223f29c5e10f00 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 10 Dec 2010 22:13:32 +0000 Subject: Add float patterns for Neon vld1-lane/dup and vst1-lane operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 17 +++++++++++++++++ test/CodeGen/ARM/vdup.ll | 18 ------------------ test/CodeGen/ARM/vlddup.ll | 18 ++++++++++++++++++ test/CodeGen/ARM/vldlane.ll | 18 ++++++++++++++++++ test/CodeGen/ARM/vstlane.ll | 18 ++++++++++++++++++ 5 files changed, 71 insertions(+), 18 deletions(-) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2286a12..2cdc1d9 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -546,6 +546,13 @@ def VLD1LNq8Pseudo : VLD1QLNPseudo; def VLD1LNq16Pseudo : VLD1QLNPseudo; def VLD1LNq32Pseudo : VLD1QLNPseudo; +def : Pat<(vector_insert (v2f32 DPR:$src), + (f32 (load addrmode6:$addr)), imm:$lane), + (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; +def : Pat<(vector_insert (v4f32 QPR:$src), + (f32 (load addrmode6:$addr)), imm:$lane), + (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; + let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: @@ -813,6 +820,11 @@ def VLD1DUPq8Pseudo : VLD1QDUPPseudo; def VLD1DUPq16Pseudo : VLD1QDUPPseudo; def VLD1DUPq32Pseudo : VLD1QDUPPseudo; +def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPd32 addrmode6:$addr)>; +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32Pseudo addrmode6:$addr)>; + let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP op7_4, string Dt> @@ -1365,6 +1377,11 @@ def VST1LNq8Pseudo : VST1QLNPseudo; def VST1LNq16Pseudo : VST1QLNPseudo; def VST1LNq32Pseudo : VST1QLNPseudo; +def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), + (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; +def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), + (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; + let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // ...with address register writeback: diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index a545f6c..e99fac1 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -162,24 +162,6 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind { ret <4 x float> %tmp2 } -define <2 x float> @v_shuffledupfloat2(float* %A) nounwind { -;CHECK: v_shuffledupfloat2: -;CHECK: vdup.32 - %tmp0 = load float* %A - %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0 - %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer - ret <2 x float> %tmp2 -} - -define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind { -;CHECK: v_shuffledupQfloat2: -;CHECK: vdup.32 - %tmp0 = load float* %A - %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0 - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer - ret <4 x float> %tmp2 -} - define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind { ;CHECK: vduplane8: ;CHECK: vdup.8 diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll index bb07ce2..033febb 100644 --- a/test/CodeGen/ARM/vlddup.ll +++ b/test/CodeGen/ARM/vlddup.ll @@ -30,6 +30,15 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind { ret <2 x i32> %tmp3 } +define <2 x float> @vld1dupf(float* %A) nounwind { +;CHECK: vld1dupf: +;CHECK: vld1.32 {d16[]}, [r0] + %tmp0 = load float* %A + %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0 + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer + ret <2 x float> %tmp2 +} + define <16 x i8> @vld1dupQi8(i8* %A) nounwind { ;CHECK: vld1dupQi8: ;Check the (default) alignment value. @@ -40,6 +49,15 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind { ret <16 x i8> %tmp3 } +define <4 x float> @vld1dupQf(float* %A) nounwind { +;CHECK: vld1dupQf: +;CHECK: vld1.32 {d16[], d17[]}, [r0] + %tmp0 = load float* %A + %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0 + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %tmp2 +} + %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> } %struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> } diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 09b68a6..fe2a0dc 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -30,6 +30,15 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp3 } +define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vld1lanef: +;CHECK: vld1.32 {d16[1]}, [r0] + %tmp1 = load <2 x float>* %B + %tmp2 = load float* %A, align 4 + %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1 + ret <2 x float> %tmp3 +} + define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vld1laneQi8: ;CHECK: vld1.8 {d17[1]}, [r0] @@ -57,6 +66,15 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp3 } +define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vld1laneQf: +;CHECK: vld1.32 {d16[0]}, [r0] + %tmp1 = load <4 x float>* %B + %tmp2 = load float* %A + %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0 + ret <4 x float> %tmp3 +} + %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index c5387e0..9aa8d59 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -30,6 +30,15 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { ret void } +define void @vst1lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst1lanef: +;CHECK: vst1.32 {d16[1]}, [r0] + %tmp1 = load <2 x float>* %B + %tmp2 = extractelement <2 x float> %tmp1, i32 1 + store float %tmp2, float* %A + ret void +} + define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst1laneQi8: ;CHECK: vst1.8 {d17[1]}, [r0] @@ -57,6 +66,15 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind { ret void } +define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst1laneQf: +;CHECK: vst1.32 {d17[1]}, [r0] + %tmp1 = load <4 x float>* %B + %tmp2 = extractelement <4 x float> %tmp1, i32 3 + store float %tmp2, float* %A + ret void +} + define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2lanei8: ;Check the alignment value. Max for this instruction is 16 bits: -- cgit v1.1