aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-11-26 13:35:26 +0000
committerBill Wendling <isanbard@gmail.com>2013-11-26 13:35:26 +0000
commitf38d6740c6532c452983731521b8323d75fc4745 (patch)
tree319d81f1083a6d236b797c90fee63b7788de9501
parentc23b3b05499f4518c64a953eea0a2496739e6d24 (diff)
downloadexternal_llvm-f38d6740c6532c452983731521b8323d75fc4745.zip
external_llvm-f38d6740c6532c452983731521b8323d75fc4745.tar.gz
external_llvm-f38d6740c6532c452983731521b8323d75fc4745.tar.bz2
Merging r195424:
------------------------------------------------------------------------ r195424 | haoliu | 2013-11-22 00:47:22 -0800 (Fri, 22 Nov 2013) | 4 lines Fix the bugs about AArch64 Load/Store vector types and bitcast between i64 and vector types. e.g. "%tmp = load <2 x i64>* %ptr" can't be selected. "%tmp = bitcast i64 %in to <2 x i32>" can't be selected. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195764 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td60
-rw-r--r--test/CodeGen/AArch64/neon-copy.ll83
-rw-r--r--test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll72
3 files changed, 215 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 98512ec..206ccd6 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -3187,6 +3187,54 @@ def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
+def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
+def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
+
+def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
+def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
+
+def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
+def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
+
+def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
+def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
+
+def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
+def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
+
+def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
+def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
+
+def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
+ (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
+ (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
+ (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
+ (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
+ (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
+ (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
+ (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
+ (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
+
+def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
+ (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
+ (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
+
+def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
+ (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
+ (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
+
// End of vector load/store multiple N-element structure(class SIMD lselem)
// The followings are post-index vector load/store multiple N-element
@@ -5739,6 +5787,12 @@ def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
+
def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
@@ -5765,6 +5819,12 @@ def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+
def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
index e1afc24..e18530e 100644
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -530,3 +530,86 @@ define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
ret <2 x i64> %shuffle
}
+define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
+; CHECK-LABEL: test_bitcastv8i8toi64:
+ %res = bitcast <8 x i8> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+ ret i64 %res
+}
+
+define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
+; CHECK-LABEL: test_bitcastv4i16toi64:
+ %res = bitcast <4 x i16> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+ ret i64 %res
+}
+
+define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
+; CHECK-LABEL: test_bitcastv2i32toi64:
+ %res = bitcast <2 x i32> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+ ret i64 %res
+}
+
+define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
+; CHECK-LABEL: test_bitcastv2f32toi64:
+ %res = bitcast <2 x float> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+ ret i64 %res
+}
+
+define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
+; CHECK-LABEL: test_bitcastv1i64toi64:
+ %res = bitcast <1 x i64> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+ ret i64 %res
+}
+
+define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
+; CHECK-LABEL: test_bitcastv1f64toi64:
+ %res = bitcast <1 x double> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+ ret i64 %res
+}
+
+define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov8i8:
+ %res = bitcast i64 %in to <8 x i8>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+ ret <8 x i8> %res
+}
+
+define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov4i16:
+ %res = bitcast i64 %in to <4 x i16>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+ ret <4 x i16> %res
+}
+
+define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2i32:
+ %res = bitcast i64 %in to <2 x i32>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+ ret <2 x i32> %res
+}
+
+define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2f32:
+ %res = bitcast i64 %in to <2 x float>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+ ret <2 x float> %res
+}
+
+define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1i64:
+ %res = bitcast i64 %in to <1 x i64>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+ ret <1 x i64> %res
+}
+
+define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1f64:
+ %res = bitcast i64 %in to <1 x double>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+ ret <1 x double> %res
+} \ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
index d0e7fc1..d5557c0 100644
--- a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
+++ b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
@@ -1,5 +1,77 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v16i8:
+; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+ %tmp = load <16 x i8>* %ptr
+ store <16 x i8> %tmp, <16 x i8>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v8i16:
+; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+ %tmp = load <8 x i16>* %ptr
+ store <8 x i16> %tmp, <8 x i16>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v4i32:
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+ %tmp = load <4 x i32>* %ptr
+ store <4 x i32> %tmp, <4 x i32>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v2i64:
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+ %tmp = load <2 x i64>* %ptr
+ store <2 x i64> %tmp, <2 x i64>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v8i8:
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+ %tmp = load <8 x i8>* %ptr
+ store <8 x i8> %tmp, <8 x i8>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v4i16:
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+ %tmp = load <4 x i16>* %ptr
+ store <4 x i16> %tmp, <4 x i16>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v2i32:
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+ %tmp = load <2 x i32>* %ptr
+ store <2 x i32> %tmp, <2 x i32>* %ptr2
+ ret void
+}
+
+define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v1i64:
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+ %tmp = load <1 x i64>* %ptr
+ store <1 x i64> %tmp, <1 x i64>* %ptr2
+ ret void
+}
+
%struct.int8x16x2_t = type { [2 x <16 x i8>] }
%struct.int16x8x2_t = type { [2 x <8 x i16>] }
%struct.int32x4x2_t = type { [2 x <4 x i32>] }