diff options
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r-- | test/CodeGen/AArch64/arm64-convert-v4f64.ll | 33 | ||||
-rw-r--r-- | test/CodeGen/AArch64/fp16-v16-instructions.ll | 105 | ||||
-rw-r--r-- | test/CodeGen/AArch64/fp16-v4-instructions.ll | 92 | ||||
-rw-r--r-- | test/CodeGen/AArch64/fp16-v8-instructions.ll | 108 | ||||
-rw-r--r-- | test/CodeGen/AArch64/fp16-vector-nvcast.ll | 89 |
5 files changed, 425 insertions, 2 deletions
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll index c4e3e4e..b8da399 100644 --- a/test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -31,3 +31,36 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) { ret <8 x i8> %tmp2 } +define <4 x half> @uitofp_v4i64_to_v4f16(<4 x i64>* %ptr) { +; CHECK: uitofp_v4i64_to_v4f16 +; CHECK-DAG: ucvtf v[[LHS:[0-9]+]].2d, v0.2d +; CHECK-DAG: ucvtf v[[RHS:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtn v[[MID:[0-9]+]].2s, v[[LHS]].2d +; CHECK-DAG: fcvtn2 v[[MID]].4s, v[[RHS]].2d +; CHECK: fcvtn v0.4h, v[[MID]].4s + %tmp1 = load <4 x i64>, <4 x i64>* %ptr + %tmp2 = uitofp <4 x i64> %tmp1 to <4 x half> + ret <4 x half> %tmp2 +} + +define <4 x i16> @trunc_v4i64_to_v4i16(<4 x i64>* %ptr) { +; CHECK: trunc_v4i64_to_v4i16 +; CHECK: xtn +; CHECK: xtn2 +; CHECK: xtn + %tmp1 = load <4 x i64>, <4 x i64>* %ptr + %tmp2 = trunc <4 x i64> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fptoui_v4f64_to_v4i16(<4 x double>* %ptr) { +; CHECK: fptoui_v4f64_to_v4i16 +; CHECK-DAG: fcvtzu v[[LHS:[0-9]+]].2d, v0.2d +; CHECK-DAG: fcvtzu v[[RHS:[0-9]+]].2d, v1.2d +; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d +; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d +; CHECK: xtn v0.4h, v[[MID]].4s + %tmp1 = load <4 x double>, <4 x double>* %ptr + %tmp2 = fptoui <4 x double> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} diff --git a/test/CodeGen/AArch64/fp16-v16-instructions.ll b/test/CodeGen/AArch64/fp16-v16-instructions.ll new file mode 100644 index 0000000..1af2bd1 --- /dev/null +++ b/test/CodeGen/AArch64/fp16-v16-instructions.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s + + +define <16 x half> @sitofp_i32(<16 x i32> %a) #0 { +; CHECK-LABEL: sitofp_i32: +; CHECK-DAG: scvtf [[S0:v[0-9]+\.4s]], v0.4s +; CHECK-DAG: scvtf [[S1:v[0-9]+\.4s]], v1.4s +; CHECK-DAG: scvtf [[S2:v[0-9]+\.4s]], v2.4s +; CHECK-DAG: scvtf [[S3:v[0-9]+\.4s]], v3.4s +; CHECK-DAG: fcvtn v0.4h, [[S0]] +; CHECK-DAG: fcvtn v1.4h, [[S2]] +; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]] +; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]] +; CHECK-DAg: ins v0.d[1], v[[R1]].d[0] +; CHECK-DAG: ins v1.d[1], v[[R3]].d[0] + + %1 = sitofp <16 x i32> %a to <16 x half> + ret <16 x half> %1 +} + + +define <16 x half> @sitofp_i64(<16 x i64> %a) #0 { +; CHECK-LABEL: sitofp_i64: +; CHECK-DAG: scvtf [[D0:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: scvtf [[D1:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: scvtf [[D2:v[0-9]+\.2d]], v2.2d +; CHECK-DAG: scvtf [[D3:v[0-9]+\.2d]], v3.2d +; CHECK-DAG: scvtf [[D4:v[0-9]+\.2d]], v4.2d +; CHECK-DAG: scvtf [[D5:v[0-9]+\.2d]], v5.2d +; CHECK-DAG: scvtf [[D6:v[0-9]+\.2d]], v6.2d +; CHECK-DAG: scvtf [[D7:v[0-9]+\.2d]], v7.2d + +; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]] +; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]] +; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]] +; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]] + +; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]] +; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]] +; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]] +; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]] + +; CHECK-DAG: fcvtn v0.4h, [[S0]].4s +; CHECK-DAG: fcvtn v1.4h, [[S2]].4s +; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s +; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s +; CHECK-DAG: ins v0.d[1], v[[R1]].d[0] +; CHECK-DAG: ins v1.d[1], v[[R3]].d[0] + + %1 = sitofp <16 x i64> %a to <16 x half> + ret <16 x half> %1 +} + + +define <16 x half> @uitofp_i32(<16 x i32> %a) #0 { +; CHECK-LABEL: uitofp_i32: +; CHECK-DAG: ucvtf [[S0:v[0-9]+\.4s]], v0.4s +; CHECK-DAG: ucvtf [[S1:v[0-9]+\.4s]], v1.4s +; CHECK-DAG: ucvtf [[S2:v[0-9]+\.4s]], v2.4s +; CHECK-DAG: ucvtf [[S3:v[0-9]+\.4s]], v3.4s +; CHECK-DAG: fcvtn v0.4h, [[S0]] +; CHECK-DAG: fcvtn v1.4h, [[S2]] +; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]] +; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]] +; CHECK-DAg: ins v0.d[1], v[[R1]].d[0] +; CHECK-DAG: ins v1.d[1], v[[R3]].d[0] + + %1 = uitofp <16 x i32> %a to <16 x half> + ret <16 x half> %1 +} + + +define <16 x half> @uitofp_i64(<16 x i64> %a) #0 { +; CHECK-LABEL: uitofp_i64: +; CHECK-DAG: ucvtf [[D0:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: ucvtf [[D1:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: ucvtf [[D2:v[0-9]+\.2d]], v2.2d +; CHECK-DAG: ucvtf [[D3:v[0-9]+\.2d]], v3.2d +; CHECK-DAG: ucvtf [[D4:v[0-9]+\.2d]], v4.2d +; CHECK-DAG: ucvtf [[D5:v[0-9]+\.2d]], v5.2d +; CHECK-DAG: ucvtf [[D6:v[0-9]+\.2d]], v6.2d +; CHECK-DAG: ucvtf [[D7:v[0-9]+\.2d]], v7.2d + +; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]] +; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]] +; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]] +; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]] + +; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]] +; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]] +; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]] +; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]] + +; CHECK-DAG: fcvtn v0.4h, [[S0]].4s +; CHECK-DAG: fcvtn v1.4h, [[S2]].4s +; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s +; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s +; CHECK-DAG: ins v0.d[1], v[[R1]].d[0] +; CHECK-DAG: ins v1.d[1], v[[R3]].d[0] + + %1 = uitofp <16 x i64> %a to <16 x half> + ret <16 x half> %1 +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll index 6db4e97..0dbda15 100644 --- a/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) { entry: @@ -129,3 +129,93 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) { %2 = bitcast <4 x half> %a to <4 x i16> ret <4 x i16> %2 } + + +define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { +; CHECK-LABEL: sitofp_i8: +; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 +; CHECK-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8 +; CHECK-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0 +; CHECK-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]] +; CHECK-NEXT: fcvtn v0.4h, [[OP4]] +; CHECK-NEXT: ret + %1 = sitofp <4 x i8> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @sitofp_i16(<4 x i16> %a) #0 { +; CHECK-LABEL: sitofp_i16: +; CHECK-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-NEXT: ret + %1 = sitofp <4 x i16> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @sitofp_i32(<4 x i32> %a) #0 { +; CHECK-LABEL: sitofp_i32: +; CHECK-NEXT: scvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK-NEXT: fcvtn v0.4h, [[OP1]] + %1 = sitofp <4 x i32> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @sitofp_i64(<4 x i64> %a) #0 { +; CHECK-LABEL: sitofp_i64: +; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s + %1 = sitofp <4 x i64> %a to <4 x half> + ret <4 x half> %1 +} + +define <4 x half> @uitofp_i8(<4 x i8> %a) #0 { +; CHECK-LABEL: uitofp_i8: +; CHECK-NEXT: bic v0.4h, #0xff, lsl #8 +; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-NEXT: ret + %1 = uitofp <4 x i8> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @uitofp_i16(<4 x i16> %a) #0 { +; CHECK-LABEL: uitofp_i16: +; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-NEXT: ret + %1 = uitofp <4 x i16> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @uitofp_i32(<4 x i32> %a) #0 { +; CHECK-LABEL: uitofp_i32: +; CHECK-NEXT: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK-NEXT: fcvtn v0.4h, [[OP1]] + %1 = uitofp <4 x i32> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @uitofp_i64(<4 x i64> %a) #0 { +; CHECK-LABEL: uitofp_i64: +; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s + %1 = uitofp <4 x i64> %a to <4 x half> + ret <4 x half> %1 +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll index e51c0c5..10a8c22 100644 --- a/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) { entry: @@ -253,3 +253,109 @@ define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) { ret <8 x i16> %2 } + +define <8 x half> @sitofp_i8(<8 x i8> %a) #0 { +; CHECK-LABEL: sitofp_i8: +; CHECK-NEXT: sshll v[[REG1:[0-9]+]].8h, v0.8b, #0 +; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0 +; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0 +; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] +; CHECK-DAG: fcvtn v0.4h, [[HIF]] +; CHECK: ins v0.d[1], v[[LOREG]].d[0] + %1 = sitofp <8 x i8> %a to <8 x half> + ret <8 x half> %1 +} + + +define <8 x half> @sitofp_i16(<8 x i16> %a) #0 { +; CHECK-LABEL: sitofp_i16: +; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 +; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] +; CHECK-DAG: fcvtn v0.4h, [[HIF]] +; CHECK: ins v0.d[1], v[[LOREG]].d[0] + %1 = sitofp <8 x i16> %a to <8 x half> + ret <8 x half> %1 +} + + +define <8 x half> @sitofp_i32(<8 x i32> %a) #0 { +; CHECK-LABEL: sitofp_i32: +; CHECK-DAG: scvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK-DAG: scvtf [[OP2:v[0-9]+\.4s]], v1.4s +; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]] +; CHECK-DAG: fcvtn v0.4h, [[OP1]] +; CHECK: ins v0.d[1], v[[REG]].d[0] + %1 = sitofp <8 x i32> %a to <8 x half> + ret <8 x half> %1 +} + + +define <8 x half> @sitofp_i64(<8 x i64> %a) #0 { +; CHECK-LABEL: sitofp_i64: +; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK: fcvtn v0.4h, [[OP3]].4s + %1 = sitofp <8 x i64> %a to <8 x half> + ret <8 x half> %1 +} + +define <8 x half> @uitofp_i8(<8 x i8> %a) #0 { +; CHECK-LABEL: uitofp_i8: +; CHECK-NEXT: ushll v[[REG1:[0-9]+]].8h, v0.8b, #0 +; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0 +; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0 +; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] +; CHECK-DAG: fcvtn v0.4h, [[HIF]] +; CHECK: ins v0.d[1], v[[LOREG]].d[0] + %1 = uitofp <8 x i8> %a to <8 x half> + ret <8 x half> %1 +} + + +define <8 x half> @uitofp_i16(<8 x i16> %a) #0 { +; CHECK-LABEL: uitofp_i16: +; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 +; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] +; CHECK-DAG: fcvtn v0.4h, [[HIF]] +; CHECK: ins v0.d[1], v[[LOREG]].d[0] + %1 = uitofp <8 x i16> %a to <8 x half> + ret <8 x half> %1 +} + + +define <8 x half> @uitofp_i32(<8 x i32> %a) #0 { +; CHECK-LABEL: uitofp_i32: +; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.4s]], v1.4s +; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]] +; CHECK-DAG: fcvtn v0.4h, [[OP1]] +; CHECK: ins v0.d[1], v[[REG]].d[0] + %1 = uitofp <8 x i32> %a to <8 x half> + ret <8 x half> %1 +} + + +define <8 x half> @uitofp_i64(<8 x i64> %a) #0 { +; CHECK-LABEL: uitofp_i64: +; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK: fcvtn v0.4h, [[OP3]].4s + %1 = uitofp <8 x i64> %a to <8 x half> + ret <8 x half> %1 +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fp16-vector-nvcast.ll b/test/CodeGen/AArch64/fp16-vector-nvcast.ll new file mode 100644 index 0000000..83e0df7 --- /dev/null +++ b/test/CodeGen/AArch64/fp16-vector-nvcast.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s + +; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src))) +define void @nvcast_v2i32(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v2i32: +; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16 +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src))) +define void @nvcast_v4i16(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v4i16: +; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src))) +define void @nvcast_v8i8(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v8i8: +; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src))) +define void @nvcast_f64(<4 x half>* %a) #0 { +; CHECK-LABEL: nvcast_f64: +; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000 +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> zeroinitializer, <4 x half>* %a + ret void +} + +; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src))) +define void @nvcast_v4i32(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v4i32: +; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16 +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src))) +define void @nvcast_v8i16(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v8i16: +; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src))) +define void @nvcast_v16i8(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v16i8: +; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src))) +define void @nvcast_v2i64(<8 x half>* %a) #0 { +; CHECK-LABEL: nvcast_v2i64: +; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000 +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> zeroinitializer, <8 x half>* %a + ret void +} + +attributes #0 = { nounwind } |