From 37ed9c199ca639565f6ce88105f9e39e898d82d0 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Mon, 1 Dec 2014 14:51:49 -0800 Subject: Update aosp/master LLVM for rebase to r222494. Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d --- test/CodeGen/AArch64/PBQP-chain.ll | 104 +++ test/CodeGen/AArch64/PBQP-coalesce-benefit.ll | 14 + test/CodeGen/AArch64/PBQP-csr.ll | 91 +++ test/CodeGen/AArch64/PBQP.ll | 14 + test/CodeGen/AArch64/Redundantstore.ll | 25 + .../aarch64-2014-08-11-MachineCombinerCrash.ll | 106 +++ .../AArch64/aarch64-a57-fp-load-balancing.ll | 14 +- test/CodeGen/AArch64/aarch64-be-bv.ll | 831 +++++++++++++++++++++ test/CodeGen/AArch64/aarch64-gep-opt.ll | 163 ++++ test/CodeGen/AArch64/aarch64-smull.ll | 332 ++++++++ test/CodeGen/AArch64/aarch64-wide-shuffle.ll | 22 + test/CodeGen/AArch64/aarch64_f16_be.ll | 67 ++ test/CodeGen/AArch64/aarch64_tree_tests.ll | 42 ++ test/CodeGen/AArch64/adc.ll | 2 +- test/CodeGen/AArch64/analyzecmp.ll | 32 + test/CodeGen/AArch64/and-mask-removal.ll | 269 +++++++ test/CodeGen/AArch64/andandshift.ll | 28 + .../AArch64/arm64-2011-03-17-AsmPrinterCrash.ll | 34 +- .../CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll | 2 +- test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll | 4 +- test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll | 66 +- test/CodeGen/AArch64/arm64-EXT-undef-mask.ll | 2 +- test/CodeGen/AArch64/arm64-aapcs-be.ll | 24 + test/CodeGen/AArch64/arm64-aapcs.ll | 42 ++ test/CodeGen/AArch64/arm64-abi.ll | 38 +- test/CodeGen/AArch64/arm64-abi_align.ll | 60 +- test/CodeGen/AArch64/arm64-addr-mode-folding.ll | 2 +- test/CodeGen/AArch64/arm64-addrmode.ll | 121 ++- test/CodeGen/AArch64/arm64-bcc.ll | 60 ++ .../AArch64/arm64-big-endian-bitconverts.ll | 4 +- test/CodeGen/AArch64/arm64-big-endian-eh.ll | 2 +- test/CodeGen/AArch64/arm64-big-endian-varargs.ll | 2 +- .../AArch64/arm64-big-endian-vector-callee.ll | 4 +- .../AArch64/arm64-big-endian-vector-caller.ll | 4 +- test/CodeGen/AArch64/arm64-cse.ll | 2 +- .../AArch64/arm64-dagcombiner-dead-indexed-load.ll | 4 - .../AArch64/arm64-dagcombiner-indexed-load.ll | 46 -- test/CodeGen/AArch64/arm64-extern-weak.ll | 19 +- .../CodeGen/AArch64/arm64-fast-isel-addr-offset.ll | 2 +- test/CodeGen/AArch64/arm64-fast-isel-alloca.ll | 7 +- test/CodeGen/AArch64/arm64-fast-isel-br.ll | 17 +- test/CodeGen/AArch64/arm64-fast-isel-call.ll | 231 +++++- test/CodeGen/AArch64/arm64-fast-isel-conversion.ll | 126 ++-- test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll | 286 +++---- test/CodeGen/AArch64/arm64-fast-isel-gv.ll | 29 +- test/CodeGen/AArch64/arm64-fast-isel-icmp.ll | 179 +++-- test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll | 8 +- test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll | 4 +- .../CodeGen/AArch64/arm64-fast-isel-materialize.ll | 44 +- test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll | 2 +- test/CodeGen/AArch64/arm64-fast-isel-rem.ll | 3 +- test/CodeGen/AArch64/arm64-fast-isel-ret.ll | 2 +- test/CodeGen/AArch64/arm64-fast-isel-select.ll | 63 -- test/CodeGen/AArch64/arm64-fast-isel-store.ll | 30 + test/CodeGen/AArch64/arm64-fast-isel.ll | 8 +- test/CodeGen/AArch64/arm64-frameaddr.ll | 15 - test/CodeGen/AArch64/arm64-indexed-memory.ll | 12 + test/CodeGen/AArch64/arm64-inline-asm.ll | 10 +- .../AArch64/arm64-patchpoint-scratch-regs.ll | 18 + .../AArch64/arm64-patchpoint-webkit_jscc.ll | 118 +++ test/CodeGen/AArch64/arm64-patchpoint.ll | 91 +-- test/CodeGen/AArch64/arm64-popcnt.ll | 14 + test/CodeGen/AArch64/arm64-prefetch.ll | 55 +- test/CodeGen/AArch64/arm64-scvt.ll | 27 +- .../AArch64/arm64-setcc-int-to-fp-combine.ll | 47 ++ test/CodeGen/AArch64/arm64-shifted-sext.ll | 4 +- test/CodeGen/AArch64/arm64-stackmap.ll | 3 +- .../AArch64/arm64-triv-disjoint-mem-access.ll | 31 + test/CodeGen/AArch64/arm64-vabs.ll | 70 ++ test/CodeGen/AArch64/arm64-vcvt_f.ll | 12 +- test/CodeGen/AArch64/arm64-vector-ext.ll | 11 + test/CodeGen/AArch64/arm64-xaluo.ll | 305 ++++++-- test/CodeGen/AArch64/atomic-ops.ll | 8 +- test/CodeGen/AArch64/br-undef-cond.ll | 26 + test/CodeGen/AArch64/cmp-const-max.ll | 36 + test/CodeGen/AArch64/cmpwithshort.ll | 46 ++ test/CodeGen/AArch64/combine-comparisons-by-cse.ll | 413 ++++++++++ test/CodeGen/AArch64/cond-sel.ll | 17 + test/CodeGen/AArch64/dag-combine-invaraints.ll | 36 + test/CodeGen/AArch64/dont-take-over-the-world.ll | 7 + test/CodeGen/AArch64/extern-weak.ll | 19 +- test/CodeGen/AArch64/f16-convert.ll | 51 +- test/CodeGen/AArch64/fast-isel-addressing-modes.ll | 627 ++++++++++++++++ test/CodeGen/AArch64/fast-isel-branch_weights.ll | 19 + test/CodeGen/AArch64/fast-isel-call-return.ll | 12 + test/CodeGen/AArch64/fast-isel-cbz.ll | 70 ++ test/CodeGen/AArch64/fast-isel-cmp-branch.ll | 293 ++++++++ test/CodeGen/AArch64/fast-isel-folding.ll | 54 ++ test/CodeGen/AArch64/fast-isel-gep.ll | 49 ++ test/CodeGen/AArch64/fast-isel-int-ext.ll | 491 ++++++++++++ test/CodeGen/AArch64/fast-isel-int-ext2.ll | 439 +++++++++++ test/CodeGen/AArch64/fast-isel-int-ext3.ll | 117 +++ test/CodeGen/AArch64/fast-isel-int-ext4.ll | 20 + test/CodeGen/AArch64/fast-isel-intrinsic.ll | 19 + test/CodeGen/AArch64/fast-isel-logic-op.ll | 362 +++++++++ test/CodeGen/AArch64/fast-isel-mul.ll | 60 +- test/CodeGen/AArch64/fast-isel-runtime-libcall.ll | 96 +++ test/CodeGen/AArch64/fast-isel-sdiv.ll | 56 ++ test/CodeGen/AArch64/fast-isel-select.ll | 316 ++++++++ test/CodeGen/AArch64/fast-isel-shift.ll | 545 ++++++++++++++ test/CodeGen/AArch64/fast-isel-sqrt.ll | 20 + test/CodeGen/AArch64/fast-isel-switch-phi.ll | 25 + test/CodeGen/AArch64/fast-isel-tbz.ll | 141 ++++ test/CodeGen/AArch64/fast-isel-trunc.ll | 12 + .../CodeGen/AArch64/fast-isel-vector-arithmetic.ll | 74 ++ test/CodeGen/AArch64/fast-isel-vret.ll | 9 + test/CodeGen/AArch64/fp16-instructions.ll | 109 +++ test/CodeGen/AArch64/fp16-v4-instructions.ll | 122 +++ test/CodeGen/AArch64/fp16-v8-instructions.ll | 255 +++++++ test/CodeGen/AArch64/fp16-vector-bitcast.ll | 203 +++++ test/CodeGen/AArch64/fp16-vector-load-store.ll | 528 +++++++++++++ test/CodeGen/AArch64/fp16-vector-shuffle.ll | 301 ++++++++ test/CodeGen/AArch64/fpconv-vector-op-scalarize.ll | 44 ++ test/CodeGen/AArch64/frameaddr.ll | 29 +- test/CodeGen/AArch64/func-calls.ll | 2 +- test/CodeGen/AArch64/half.ll | 83 ++ test/CodeGen/AArch64/hints.ll | 67 ++ test/CodeGen/AArch64/init-array.ll | 4 +- test/CodeGen/AArch64/intrinsics-memory-barrier.ll | 57 ++ test/CodeGen/AArch64/jump-table.ll | 11 +- test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll | 8 + test/CodeGen/AArch64/machine_cse.ll | 45 ++ test/CodeGen/AArch64/madd-combiner.ll | 37 + test/CodeGen/AArch64/madd-lohi.ll | 19 + test/CodeGen/AArch64/mul-lohi.ll | 13 +- test/CodeGen/AArch64/neon-perm.ll | 7 + test/CodeGen/AArch64/neon-scalar-copy.ll | 17 + test/CodeGen/AArch64/paired-load.ll | 16 + test/CodeGen/AArch64/pic-eh-stubs.ll | 2 +- test/CodeGen/AArch64/postra-mi-sched.ll | 31 + test/CodeGen/AArch64/rbit.ll | 20 + test/CodeGen/AArch64/rm_redundant_cmp.ll | 254 +++++++ test/CodeGen/AArch64/sdivpow2.ll | 74 ++ test/CodeGen/AArch64/stack-guard-remat-bitcast.ll | 26 + test/CodeGen/AArch64/stack_guard_remat.ll | 48 ++ test/CodeGen/AArch64/tail-call.ll | 11 + test/CodeGen/AArch64/tailcall-fastisel.ll | 11 + test/CodeGen/AArch64/tbz-tbnz.ll | 258 +++++++ test/CodeGen/AArch64/trunc-v1i64.ll | 21 +- 139 files changed, 11039 insertions(+), 861 deletions(-) create mode 100644 test/CodeGen/AArch64/PBQP-chain.ll create mode 100644 test/CodeGen/AArch64/PBQP-coalesce-benefit.ll create mode 100644 test/CodeGen/AArch64/PBQP-csr.ll create mode 100644 test/CodeGen/AArch64/PBQP.ll create mode 100644 test/CodeGen/AArch64/Redundantstore.ll create mode 100644 test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll create mode 100644 test/CodeGen/AArch64/aarch64-be-bv.ll create mode 100644 test/CodeGen/AArch64/aarch64-gep-opt.ll create mode 100644 test/CodeGen/AArch64/aarch64-smull.ll create mode 100644 test/CodeGen/AArch64/aarch64-wide-shuffle.ll create mode 100644 test/CodeGen/AArch64/aarch64_f16_be.ll create mode 100644 test/CodeGen/AArch64/aarch64_tree_tests.ll create mode 100644 test/CodeGen/AArch64/analyzecmp.ll create mode 100644 test/CodeGen/AArch64/and-mask-removal.ll create mode 100644 test/CodeGen/AArch64/andandshift.ll create mode 100644 test/CodeGen/AArch64/arm64-aapcs-be.ll create mode 100644 test/CodeGen/AArch64/arm64-bcc.ll delete mode 100644 test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll delete mode 100644 test/CodeGen/AArch64/arm64-fast-isel-select.ll create mode 100644 test/CodeGen/AArch64/arm64-fast-isel-store.ll delete mode 100644 test/CodeGen/AArch64/arm64-frameaddr.ll create mode 100644 test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll create mode 100644 test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll create mode 100644 test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll create mode 100644 test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll create mode 100644 test/CodeGen/AArch64/br-undef-cond.ll create mode 100644 test/CodeGen/AArch64/cmp-const-max.ll create mode 100644 test/CodeGen/AArch64/cmpwithshort.ll create mode 100644 test/CodeGen/AArch64/combine-comparisons-by-cse.ll create mode 100644 test/CodeGen/AArch64/dag-combine-invaraints.ll create mode 100644 test/CodeGen/AArch64/dont-take-over-the-world.ll create mode 100644 test/CodeGen/AArch64/fast-isel-addressing-modes.ll create mode 100644 test/CodeGen/AArch64/fast-isel-branch_weights.ll create mode 100644 test/CodeGen/AArch64/fast-isel-call-return.ll create mode 100644 test/CodeGen/AArch64/fast-isel-cbz.ll create mode 100644 test/CodeGen/AArch64/fast-isel-cmp-branch.ll create mode 100644 test/CodeGen/AArch64/fast-isel-folding.ll create mode 100644 test/CodeGen/AArch64/fast-isel-gep.ll create mode 100644 test/CodeGen/AArch64/fast-isel-int-ext.ll create mode 100644 test/CodeGen/AArch64/fast-isel-int-ext2.ll create mode 100644 test/CodeGen/AArch64/fast-isel-int-ext3.ll create mode 100644 test/CodeGen/AArch64/fast-isel-int-ext4.ll create mode 100644 test/CodeGen/AArch64/fast-isel-intrinsic.ll create mode 100644 test/CodeGen/AArch64/fast-isel-logic-op.ll create mode 100644 test/CodeGen/AArch64/fast-isel-runtime-libcall.ll create mode 100644 test/CodeGen/AArch64/fast-isel-sdiv.ll create mode 100644 test/CodeGen/AArch64/fast-isel-select.ll create mode 100644 test/CodeGen/AArch64/fast-isel-shift.ll create mode 100644 test/CodeGen/AArch64/fast-isel-sqrt.ll create mode 100644 test/CodeGen/AArch64/fast-isel-switch-phi.ll create mode 100644 test/CodeGen/AArch64/fast-isel-tbz.ll create mode 100644 test/CodeGen/AArch64/fast-isel-trunc.ll create mode 100644 test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll create mode 100644 test/CodeGen/AArch64/fast-isel-vret.ll create mode 100644 test/CodeGen/AArch64/fp16-instructions.ll create mode 100644 test/CodeGen/AArch64/fp16-v4-instructions.ll create mode 100644 test/CodeGen/AArch64/fp16-v8-instructions.ll create mode 100644 test/CodeGen/AArch64/fp16-vector-bitcast.ll create mode 100644 test/CodeGen/AArch64/fp16-vector-load-store.ll create mode 100644 test/CodeGen/AArch64/fp16-vector-shuffle.ll create mode 100644 test/CodeGen/AArch64/fpconv-vector-op-scalarize.ll create mode 100644 test/CodeGen/AArch64/half.ll create mode 100644 test/CodeGen/AArch64/hints.ll create mode 100644 test/CodeGen/AArch64/intrinsics-memory-barrier.ll create mode 100644 test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll create mode 100644 test/CodeGen/AArch64/machine_cse.ll create mode 100644 test/CodeGen/AArch64/madd-combiner.ll create mode 100644 test/CodeGen/AArch64/madd-lohi.ll create mode 100644 test/CodeGen/AArch64/paired-load.ll create mode 100644 test/CodeGen/AArch64/postra-mi-sched.ll create mode 100644 test/CodeGen/AArch64/rbit.ll create mode 100644 test/CodeGen/AArch64/rm_redundant_cmp.ll create mode 100644 test/CodeGen/AArch64/sdivpow2.ll create mode 100644 test/CodeGen/AArch64/stack-guard-remat-bitcast.ll create mode 100644 test/CodeGen/AArch64/stack_guard_remat.ll create mode 100644 test/CodeGen/AArch64/tailcall-fastisel.ll create mode 100644 test/CodeGen/AArch64/tbz-tbnz.ll (limited to 'test/CodeGen/AArch64') diff --git a/test/CodeGen/AArch64/PBQP-chain.ll b/test/CodeGen/AArch64/PBQP-chain.ll new file mode 100644 index 0000000..c4ba026 --- /dev/null +++ b/test/CodeGen/AArch64/PBQP-chain.ll @@ -0,0 +1,104 @@ +; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD +; +; Test PBQP is able to fulfill the accumulator chaining constraint. +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; CHECK-LABEL: fir +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}} +define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) { +entry: + %0 = load double* %c, align 8 + %1 = load double* %x, align 8 + %mul = fmul fast double %1, %0 + %2 = load double* %y, align 8 + %mul7 = fmul fast double %2, %0 + %arrayidx.1 = getelementptr inbounds double* %c, i64 1 + %3 = load double* %arrayidx.1, align 8 + %arrayidx2.1 = getelementptr inbounds double* %x, i64 1 + %4 = load double* %arrayidx2.1, align 8 + %mul.1 = fmul fast double %4, %3 + %add.1 = fadd fast double %mul.1, %mul + %arrayidx6.1 = getelementptr inbounds double* %y, i64 1 + %5 = load double* %arrayidx6.1, align 8 + %mul7.1 = fmul fast double %5, %3 + %add8.1 = fadd fast double %mul7.1, %mul7 + %arrayidx.2 = getelementptr inbounds double* %c, i64 2 + %6 = load double* %arrayidx.2, align 8 + %arrayidx2.2 = getelementptr inbounds double* %x, i64 2 + %7 = load double* %arrayidx2.2, align 8 + %mul.2 = fmul fast double %7, %6 + %add.2 = fadd fast double %mul.2, %add.1 + %arrayidx6.2 = getelementptr inbounds double* %y, i64 2 + %8 = load double* %arrayidx6.2, align 8 + %mul7.2 = fmul fast double %8, %6 + %add8.2 = fadd fast double %mul7.2, %add8.1 + %arrayidx.3 = getelementptr inbounds double* %c, i64 3 + %9 = load double* %arrayidx.3, align 8 + %arrayidx2.3 = getelementptr inbounds double* %x, i64 3 + %10 = load double* %arrayidx2.3, align 8 + %mul.3 = fmul fast double %10, %9 + %add.3 = fadd fast double %mul.3, %add.2 + %arrayidx6.3 = getelementptr inbounds double* %y, i64 3 + %11 = load double* %arrayidx6.3, align 8 + %mul7.3 = fmul fast double %11, %9 + %add8.3 = fadd fast double %mul7.3, %add8.2 + %arrayidx.4 = getelementptr inbounds double* %c, i64 4 + %12 = load double* %arrayidx.4, align 8 + %arrayidx2.4 = getelementptr inbounds double* %x, i64 4 + %13 = load double* %arrayidx2.4, align 8 + %mul.4 = fmul fast double %13, %12 + %add.4 = fadd fast double %mul.4, %add.3 + %arrayidx6.4 = getelementptr inbounds double* %y, i64 4 + %14 = load double* %arrayidx6.4, align 8 + %mul7.4 = fmul fast double %14, %12 + %add8.4 = fadd fast double %mul7.4, %add8.3 + %arrayidx.5 = getelementptr inbounds double* %c, i64 5 + %15 = load double* %arrayidx.5, align 8 + %arrayidx2.5 = getelementptr inbounds double* %x, i64 5 + %16 = load double* %arrayidx2.5, align 8 + %mul.5 = fmul fast double %16, %15 + %add.5 = fadd fast double %mul.5, %add.4 + %arrayidx6.5 = getelementptr inbounds double* %y, i64 5 + %17 = load double* %arrayidx6.5, align 8 + %mul7.5 = fmul fast double %17, %15 + %add8.5 = fadd fast double %mul7.5, %add8.4 + %arrayidx.6 = getelementptr inbounds double* %c, i64 6 + %18 = load double* %arrayidx.6, align 8 + %arrayidx2.6 = getelementptr inbounds double* %x, i64 6 + %19 = load double* %arrayidx2.6, align 8 + %mul.6 = fmul fast double %19, %18 + %add.6 = fadd fast double %mul.6, %add.5 + %arrayidx6.6 = getelementptr inbounds double* %y, i64 6 + %20 = load double* %arrayidx6.6, align 8 + %mul7.6 = fmul fast double %20, %18 + %add8.6 = fadd fast double %mul7.6, %add8.5 + %arrayidx.7 = getelementptr inbounds double* %c, i64 7 + %21 = load double* %arrayidx.7, align 8 + %arrayidx2.7 = getelementptr inbounds double* %x, i64 7 + %22 = load double* %arrayidx2.7, align 8 + %mul.7 = fmul fast double %22, %21 + %add.7 = fadd fast double %mul.7, %add.6 + %arrayidx6.7 = getelementptr inbounds double* %y, i64 7 + %23 = load double* %arrayidx6.7, align 8 + %mul7.7 = fmul fast double %23, %21 + %add8.7 = fadd fast double %mul7.7, %add8.6 + store double %add.7, double* %rx, align 8 + store double %add8.7, double* %ry, align 8 + ret void +} + diff --git a/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll b/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll new file mode 100644 index 0000000..45ac5e6 --- /dev/null +++ b/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s + +; CHECK-LABEL: test: +define i32 @test(i32 %acc, i32* nocapture readonly %c) { +entry: + %0 = load i32* %c, align 4 +; CHECK-NOT: mov w{{[0-9]*}}, w0 + %add = add nsw i32 %0, %acc + %arrayidx1 = getelementptr inbounds i32* %c, i64 1 + %1 = load i32* %arrayidx1, align 4 + %add2 = add nsw i32 %add, %1 + ret i32 %add2 +} + diff --git a/test/CodeGen/AArch64/PBQP-csr.ll b/test/CodeGen/AArch64/PBQP-csr.ll new file mode 100644 index 0000000..64335ae --- /dev/null +++ b/test/CodeGen/AArch64/PBQP-csr.ll @@ -0,0 +1,91 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s + +%pl = type { i32, i32, i32, i32, %p*, %l*, double* } +%p = type { i32, %ca*, [27 x %ca*], %v*, %v*, %v*, i32 } +%ca = type { %v, float, i32 } +%v = type { double, double, double } +%l = type opaque +%rs = type { i32, i32, i32, i32, %v*, %v*, [21 x double], %v, %v, %v, double, double, double } + +;CHECK-LABEL: test_csr +define void @test_csr(%pl* nocapture readnone %this, %rs* nocapture %r) align 2 { +;CHECK-NOT: stp {{d[0-9]+}}, {{d[0-9]+}} +entry: + %x.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 0 + %y.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 1 + %z.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 2 + %x.i61 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 0 + %y.i62 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 1 + %z.i63 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 2 + %x.i58 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 0 + %y.i59 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 1 + %z.i60 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 2 + %na = getelementptr inbounds %rs* %r, i64 0, i32 0 + %0 = bitcast double* %x.i to i8* + call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false) + %1 = load i32* %na, align 4 + %cmp70 = icmp sgt i32 %1, 0 + br i1 %cmp70, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %fn = getelementptr inbounds %rs* %r, i64 0, i32 4 + %2 = load %v** %fn, align 8 + %fs = getelementptr inbounds %rs* %r, i64 0, i32 5 + %3 = load %v** %fs, align 8 + %4 = sext i32 %1 to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %5 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add6.i, %for.body ] + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %6 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %17, %for.body ] + %7 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %22, %for.body ] + %8 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %26, %for.body ] + %9 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %28, %for.body ] + %x.i54 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 0 + %x1.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 0 + %y.i56 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 1 + %10 = bitcast double* %x.i54 to <2 x double>* + %11 = load <2 x double>* %10, align 8 + %y2.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 1 + %12 = bitcast double* %x1.i to <2 x double>* + %13 = load <2 x double>* %12, align 8 + %14 = fadd fast <2 x double> %13, %11 + %z.i57 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 2 + %15 = load double* %z.i57, align 8 + %z4.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 2 + %16 = load double* %z4.i, align 8 + %add5.i = fadd fast double %16, %15 + %17 = fadd fast <2 x double> %6, %11 + %18 = bitcast double* %x.i to <2 x double>* + store <2 x double> %17, <2 x double>* %18, align 8 + %19 = load double* %x1.i, align 8 + %20 = insertelement <2 x double> undef, double %15, i32 0 + %21 = insertelement <2 x double> %20, double %19, i32 1 + %22 = fadd fast <2 x double> %7, %21 + %23 = bitcast double* %z.i to <2 x double>* + store <2 x double> %22, <2 x double>* %23, align 8 + %24 = bitcast double* %y2.i to <2 x double>* + %25 = load <2 x double>* %24, align 8 + %26 = fadd fast <2 x double> %8, %25 + %27 = bitcast double* %y.i62 to <2 x double>* + store <2 x double> %26, <2 x double>* %27, align 8 + %28 = fadd fast <2 x double> %14, %9 + %29 = bitcast double* %x.i58 to <2 x double>* + store <2 x double> %28, <2 x double>* %29, align 8 + %add6.i = fadd fast double %add5.i, %5 + store double %add6.i, double* %z.i60, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp = icmp slt i64 %indvars.iv.next, %4 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; Function Attrs: nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + diff --git a/test/CodeGen/AArch64/PBQP.ll b/test/CodeGen/AArch64/PBQP.ll new file mode 100644 index 0000000..675a2ca --- /dev/null +++ b/test/CodeGen/AArch64/PBQP.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -regalloc=pbqp -pbqp-coalescing -o - %s | FileCheck %s + +define i32 @foo(i32 %a) { +; CHECK-LABEL: foo: +; CHECK: bl bar +; CHECK: bl baz + %call = call i32 @bar(i32 %a) + %call1 = call i32 @baz(i32 %call) + ret i32 %call1 +} + +declare i32 @bar(i32) +declare i32 @baz(i32) + diff --git a/test/CodeGen/AArch64/Redundantstore.ll b/test/CodeGen/AArch64/Redundantstore.ll new file mode 100644 index 0000000..72f7f46 --- /dev/null +++ b/test/CodeGen/AArch64/Redundantstore.ll @@ -0,0 +1,25 @@ +; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +@end_of_array = common global i8* null, align 8 + +; CHECK-LABEL: @test +; CHECK: stur +; CHECK-NOT: stur +define i8* @test(i32 %size) { +entry: + %0 = load i8** @end_of_array, align 8 + %conv = sext i32 %size to i64 + %and = and i64 %conv, -8 + %conv2 = trunc i64 %and to i32 + %add.ptr.sum = add nsw i64 %and, -4 + %add.ptr3 = getelementptr inbounds i8* %0, i64 %add.ptr.sum + %size4 = bitcast i8* %add.ptr3 to i32* + store i32 %conv2, i32* %size4, align 4 + %add.ptr.sum9 = add nsw i64 %and, -4 + %add.ptr5 = getelementptr inbounds i8* %0, i64 %add.ptr.sum9 + %size6 = bitcast i8* %add.ptr5 to i32* + store i32 %conv2, i32* %size6, align 4 + ret i8* %0 +} + diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll new file mode 100644 index 0000000..4da33a0 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll @@ -0,0 +1,106 @@ +; RUN: llc < %s -O2 -mtriple=aarch64-none-linux-gnu + +; Bug 20598 + + +define void @test() #0 { +entry: + br label %for.body, !dbg !39 + +for.body: ; preds = %for.body, %entry + %arrayidx5 = getelementptr inbounds i32* null, i64 1, !dbg !43 + %0 = load i32* null, align 4, !dbg !45, !tbaa !46 + %s1 = sub nsw i32 0, %0, !dbg !50 + %n1 = sext i32 %s1 to i64, !dbg !50 + %arrayidx21 = getelementptr inbounds i32* null, i64 3, !dbg !51 + %add53 = add nsw i64 %n1, 0, !dbg !52 + %add55 = add nsw i64 %n1, 0, !dbg !53 + %mul63 = mul nsw i64 %add53, -20995, !dbg !54 + tail call void @llvm.dbg.value(metadata !{i64 %mul63}, i64 0, metadata !30, metadata !{metadata !"0x102"}), !dbg !55 + %mul65 = mul nsw i64 %add55, -3196, !dbg !56 + %add67 = add nsw i64 0, %mul65, !dbg !57 + %add80 = add i64 0, 1024, !dbg !58 + %add81 = add i64 %add80, %mul63, !dbg !58 + %add82 = add i64 %add81, 0, !dbg !58 + %shr83351 = lshr i64 %add82, 11, !dbg !58 + %conv84 = trunc i64 %shr83351 to i32, !dbg !58 + store i32 %conv84, i32* %arrayidx21, align 4, !dbg !58, !tbaa !46 + %add86 = add i64 0, 1024, !dbg !59 + %add87 = add i64 %add86, 0, !dbg !59 + %add88 = add i64 %add87, %add67, !dbg !59 + %shr89352 = lshr i64 %add88, 11, !dbg !59 + %n2 = trunc i64 %shr89352 to i32, !dbg !59 + store i32 %n2, i32* %arrayidx5, align 4, !dbg !59, !tbaa !46 + br label %for.body, !dbg !39 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!36, !37} +!llvm.ident = !{!38} + +!0 = metadata !{metadata !"0x11\0012\00clang version 3.6.0 \001\00\000\00\001", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [] [] [] +!1 = metadata !{metadata !"test.c", metadata !""} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{metadata !"0x2e\00\00\00\00140\000\001\000\006\00256\001\00141", metadata !1, metadata !5, metadata !6, null, void ()* @test, null, null, metadata !12} ; [ DW_TAG_subprogram ] [] [] [def] [scope 141] [] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [] [] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [] [] [from ] +!7 = metadata !{null, metadata !8} +!8 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !9} ; [ DW_TAG_pointer_type ] [] [] [] +!9 = metadata !{metadata !"0x16\00\0030\000\000\000\000", metadata !10, null, metadata !11} ; [ DW_TAG_typedef ] [] [] [] [from int] +!10 = metadata !{metadata !"", metadata !""} +!11 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [int] [] +!12 = metadata !{metadata !13, metadata !14, metadata !18, metadata !19, metadata !20, metadata !21, metadata !22, metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28, metadata !29, metadata !30, metadata !31, metadata !32, metadata !33, metadata !34, metadata !35} +!13 = metadata !{metadata !"0x101\00\0016777356\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [] [data] [] +!14 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!15 = metadata !{metadata !"0x16\00\00183\000\000\000\000", metadata !16, null, metadata !17} ; [ DW_TAG_typedef ] [] [INT32] [] [from long int] +!16 = metadata !{metadata !"", metadata !""} +!17 = metadata !{metadata !"0x24\00\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [long int] [] +!18 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!19 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!20 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!21 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!22 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!23 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] [] +!24 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!25 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!26 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!27 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!28 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!29 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!30 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!31 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!32 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!33 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] [] +!34 = metadata !{metadata !"0x100\00\00145\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_auto_variable ] [ ] [] [] +!35 = metadata !{metadata !"0x100\00\00146\000", metadata !4, metadata !5, metadata !11} ; [ DW_TAG_auto_variable ] [ ] [] [] +!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!37 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} +!38 = metadata !{metadata !"clang version 3.6.0 "} +!39 = metadata !{i32 154, i32 8, metadata !40, null} +!40 = metadata !{metadata !"0xb\00154\008\002", metadata !1, metadata !41} ; [ DW_TAG_lexical_block ] [ ] [] +!41 = metadata !{metadata !"0xb\00154\008\001", metadata !1, metadata !42} ; [ DW_TAG_lexical_block ] [ ] [] +!42 = metadata !{metadata !"0xb\00154\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [ ] [] +!43 = metadata !{i32 157, i32 5, metadata !44, null} +!44 = metadata !{metadata !"0xb\00154\0042\000", metadata !1, metadata !42} ; [ DW_TAG_lexical_block ] [ ] [] +!45 = metadata !{i32 159, i32 5, metadata !44, null} +!46 = metadata !{metadata !47, metadata !47, i64 0} +!47 = metadata !{metadata !"int", metadata !48, i64 0} +!48 = metadata !{metadata !"omnipotent char", metadata !49, i64 0} +!49 = metadata !{metadata !"Simple C/C++ TBAA"} +!50 = metadata !{i32 160, i32 5, metadata !44, null} +!51 = metadata !{i32 161, i32 5, metadata !44, null} +!52 = metadata !{i32 188, i32 5, metadata !44, null} +!53 = metadata !{i32 190, i32 5, metadata !44, null} +!54 = metadata !{i32 198, i32 5, metadata !44, null} +!55 = metadata !{i32 144, i32 13, metadata !4, null} +!56 = metadata !{i32 200, i32 5, metadata !44, null} +!57 = metadata !{i32 203, i32 5, metadata !44, null} +!58 = metadata !{i32 207, i32 5, metadata !44, null} +!59 = metadata !{i32 208, i32 5, metadata !44, null} diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll index fb229fc..7108bc0 100644 --- a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll +++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN -; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD +; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD +; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so ; our test strategy is to: @@ -73,7 +75,9 @@ entry: ; CHECK: fmsub [[x]] ; CHECK: fmadd [[y]] ; CHECK: fmadd [[x]] -; CHECK: stp [[x]], [[y]] +; CHECK-A57: stp [[x]], [[y]] +; CHECK-A53-DAG: str [[x]] +; CHECK-A53-DAG: str [[y]] define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 { entry: @@ -166,7 +170,9 @@ declare void @g(...) #1 ; CHECK: fmsub [[x]] ; CHECK: fmadd [[y]] ; CHECK: fmadd [[x]] -; CHECK: stp [[x]], [[y]] +; CHECK-A57: stp [[x]], [[y]] +; CHECK-A53-DAG: str [[x]] +; CHECK-A53-DAG: str [[y]] define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 { entry: diff --git a/test/CodeGen/AArch64/aarch64-be-bv.ll b/test/CodeGen/AArch64/aarch64-be-bv.ll new file mode 100644 index 0000000..01642a4 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -0,0 +1,831 @@ +; RUN: llc -mtriple=aarch64_be--linux-gnu < %s | FileCheck %s + +@vec_v8i16 = global <8 x i16> + +; CHECK-LABEL: movi_modimm_t1: +define i16 @movi_modimm_t1() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t2: +define i16 @movi_modimm_t2() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t3: +define i16 @movi_modimm_t3() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t4: +define i16 @movi_modimm_t4() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #24 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t5: +define i16 @movi_modimm_t5() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t6: +define i16 @movi_modimm_t6() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t7: +define i16 @movi_modimm_t7() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t8: +define i16 @movi_modimm_t8() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t9: +define i16 @movi_modimm_t9() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t10: +define i16 @movi_modimm_t10() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: fmov_modimm_t11: +define i16 @fmov_modimm_t11() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].4s, #3.00000000 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: fmov_modimm_t12: +define i16 @fmov_modimm_t12() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].2d, #0.17968750 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t1: +define i16 @mvni_modimm_t1() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t2: +define i16 @mvni_modimm_t2() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t3: +define i16 @mvni_modimm_t3() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t4: +define i16 @mvni_modimm_t4() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #24 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t5: +define i16 @mvni_modimm_t5() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t6: +define i16 @mvni_modimm_t6() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t7: +define i16 @mvni_modimm_t7() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t8: +define i16 @mvni_modimm_t8() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: bic_modimm_t1: +define i16 @bic_modimm_t1() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = and <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: bic_modimm_t2: +define i16 @bic_modimm_t2() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #8 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = and <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: bic_modimm_t3: +define i16 @bic_modimm_t3() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #16 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = and <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: bic_modimm_t4: +define i16 @bic_modimm_t4() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #24 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = and <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: bic_modimm_t5: +define i16 @bic_modimm_t5() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #0x1 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = and <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: bic_modimm_t6: +define i16 @bic_modimm_t6() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #0x1, lsl #8 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = and <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: orr_modimm_t1: +define i16 @orr_modimm_t1() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = or <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: orr_modimm_t2: +define i16 @orr_modimm_t2() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #8 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = or <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: orr_modimm_t3: +define i16 @orr_modimm_t3() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #16 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = or <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: orr_modimm_t4: +define i16 @orr_modimm_t4() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #24 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = or <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: orr_modimm_t5: +define i16 @orr_modimm_t5() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #0x1 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = or <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: orr_modimm_t6: +define i16 @orr_modimm_t6() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #0x1, lsl #8 + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = or <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +declare i8 @f_v8i8(<8 x i8> %arg) +declare i16 @f_v4i16(<4 x i16> %arg) +declare i32 @f_v2i32(<2 x i32> %arg) +declare i64 @f_v1i64(<1 x i64> %arg) +declare i8 @f_v16i8(<16 x i8> %arg) +declare i16 @f_v8i16(<8 x i16> %arg) +declare i32 @f_v4i32(<4 x i32> %arg) +declare i64 @f_v2i64(<2 x i64> %arg) + +; CHECK-LABEL: modimm_t1_call: +define void @modimm_t1_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t2_call: +define void @modimm_t2_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t3_call: +define void @modimm_t3_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #16 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #16 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t4_call: +define void @modimm_t4_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #24 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #24 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #24 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #24 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #24 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #24 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #24 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t5_call: +define void @modimm_t5_call() { + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.4h, #0x5 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t6_call: +define void @modimm_t6_call() { + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.4h, #0x5, lsl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2, lsl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t7_call: +define void @modimm_t7_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t8_call: +define void @modimm_t8_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #16 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #16 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t9_call: +define void @modimm_t9_call() { + ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x7 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x6 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x5 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x4 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x3 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: modimm_t10_call: +define void @modimm_t10_call() { + ; CHECK: movi d[[REG1:[0-9]+]], #0x0000ff000000ff + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi d[[REG1:[0-9]+]], #0x00ffff0000ffff + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi d[[REG1:[0-9]+]], #0xffffffffffffffff + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffff00ffffff + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffffffff0000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffff00000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: modimm_t11_call: +define void @modimm_t11_call() { + ; CHECK: fmov v[[REG1:[0-9]+]].2s, #4.00000000 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2s, #3.75000000 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2s, #3.50000000 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: fmov v{{[0-9]+}}.2s, #0.39062500 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) + ; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.25000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.00000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: fmov v[[REG1:[0-9]+]].4s, #2.75000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + ; CHECK: fmov v[[REG:[0-9]+]].4s, #2.5000000 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) + + ret void +} + +; CHECK-LABEL: modimm_t12_call: +define void @modimm_t12_call() { + ; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.18750000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.17968750 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.17187500 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll new file mode 100644 index 0000000..811eed9 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -0,0 +1,163 @@ +; RUN: llc -O3 -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -O3 -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s +; RUN: llc -O3 -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linux-gnueabi" + +; Following test cases test enabling SeparateConstOffsetFromGEP pass in AArch64 +; backend. If useAA() returns true, it will lower a GEP with multiple indices +; into GEPs with a single index, otherwise it will lower it into a +; "ptrtoint+arithmetics+inttoptr" form. + +%struct = type { i32, i32, i32, i32, [20 x i32] } + +; Check that when two complex GEPs are used in two basic blocks, LLVM can +; elimilate the common subexpression for the second use. +define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) { + %liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3 + %1 = load i32* %liberties, align 4 + %cmp = icmp eq i32 %1, %lib + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2 + %2 = load i32* %origin, align 4 + store i32 %2, i32* %adj, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; CHECK-LABEL: test_GEP_CSE: +; CHECK: madd +; CHECK: ldr +; CHECK-NOT: madd +; CHECK:ldr + +; CHECK-NoAA-LABEL: @test_GEP_CSE( +; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64 +; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 +; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] +; CHECK-NoAA: add i64 [[PTR2]], 23052 +; CHECK-NoAA: inttoptr +; CHECK-NoAA: if.then: +; CHECK-NoAA-NOT: ptrtoint +; CHECK-NoAA-NOT: mul +; CHECK-NoAA: add i64 [[PTR2]], 23048 +; CHECK-NoAA: inttoptr + +; CHECK-UseAA-LABEL: @test_GEP_CSE( +; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8* +; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 +; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]] +; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052 +; CHECK-UseAA: bitcast +; CHECK-UseAA: if.then: +; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048 +; CHECK-UseAA: bitcast + +%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]} +%struct.pt = type { %struct.point*, i32, i32 } +%struct.point = type { i32, i32 } + +; Check when a GEP is used across two basic block, LLVM can sink the address +; calculation and code gen can generate a better addressing mode for the second +; use. +define void @test_GEP_across_BB(%class.my* %this, i64 %idx) { + %1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1 + %2 = load i32* %1, align 4 + %3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2 + %4 = load i32* %3, align 4 + %5 = icmp eq i32 %2, %4 + br i1 %5, label %if.true, label %exit + +if.true: + %6 = shl i32 %4, 1 + store i32 %6, i32* %3, align 4 + br label %exit + +exit: + %7 = add nsw i32 %4, 1 + store i32 %7, i32* %1, align 4 + ret void +} +; CHECK-LABEL: test_GEP_across_BB: +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528] +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532] +; CHECK-NOT: add +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532] +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528] + +; CHECK-NoAA-LABEL: test_GEP_across_BB( +; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528 +; CHECK-NoAA: add i64 [[TMP]], 532 +; CHECK-NoAA: if.true: +; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532 +; CHECK-NoAA: exit: +; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528 + +; CHECK-UseAA-LABEL: test_GEP_across_BB( +; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr +; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528 +; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532 +; CHECK-UseAA: if.true: +; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532 +; CHECK-UseAA: exit: +; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528 + +%struct.S = type { float, double } +@struct_array = global [1024 x %struct.S] zeroinitializer, align 16 + +; The following two test cases check we can extract constant from indices of +; struct type. +; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the +; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field +; offset is 8, the total constant offset is (5 * 16 + 8) = 88. +define double* @test-struct_1(i32 %i) { +entry: + %add = add nsw i32 %i, 5 + %idxprom = sext i32 %add to i64 + %p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1 + ret double* %p +} +; CHECK-NoAA-LABEL: @test-struct_1( +; CHECK-NoAA-NOT: getelementptr +; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88 + +; CHECK-UseAA-LABEL: @test-struct_1( +; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88 + +%struct3 = type { i64, i32 } +%struct2 = type { %struct3, i32 } +%struct1 = type { i64, %struct2 } +%struct0 = type { i32, i32, i64*, [100 x %struct1] } + +; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1". +; "i32 3" is the 4th element whose field offset is 16. The alloca size of +; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the +; total constant offset is 16 + (-2 * 32) + 8 = -40 +define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) { +entry: + %arrayidx = add nsw i64 %idx, -2 + %ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 + ret %struct2* %ptr2 +} +; CHECK-NoAA-LABEL: @test-struct_2( +; CHECK-NoAA-NOT: = getelementptr +; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40 + +; CHECK-UseAA-LABEL: @test-struct_2( +; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40 + +; Test that when a index is added from two constant, SeparateConstOffsetFromGEP +; pass does not generate incorrect result. +define void @test_const_add([3 x i32]* %in) { + %inc = add nsw i32 2, 1 + %idxprom = sext i32 %inc to i64 + %arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2 + store i32 0, i32* %arrayidx, align 4 + ret void +} +; CHECK-LABEL: test_const_add: +; CHECK: str wzr, [x0, #44] diff --git a/test/CodeGen/AArch64/aarch64-smull.ll b/test/CodeGen/AArch64/aarch64-smull.ll new file mode 100644 index 0000000..92582d7 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-smull.ll @@ -0,0 +1,332 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s + +define <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: smull_v8i8_v8i16: +; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: smull_v4i16_v4i32: +; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { +; CHECK-LABEL: smull_v2i32_v2i64: +; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: umull_v8i8_v8i16: +; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: umull_v4i16_v4i32: +; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { +; CHECK-LABEL: umull_v2i32_v2i64: +; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +; CHECK-LABEL: smlal_v8i8_v8i16: +; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +; CHECK-LABEL: smlal_v4i16_v4i32: +; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 +} + +define <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +; CHECK-LABEL: smlal_v2i32_v2i64: +; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 +} + +define <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +; CHECK-LABEL: umlal_v8i8_v8i16: +; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +; CHECK-LABEL: umlal_v4i16_v4i32: +; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 +} + +define <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +; CHECK-LABEL: umlal_v2i32_v2i64: +; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 +} + +define <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +; CHECK-LABEL: smlsl_v8i8_v8i16: +; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +; CHECK-LABEL: smlsl_v4i16_v4i32: +; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 +} + +define <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +; CHECK-LABEL: smlsl_v2i32_v2i64: +; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 +} + +define <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +; CHECK-LABEL: umlsl_v8i8_v8i16: +; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +; CHECK-LABEL: umlsl_v4i16_v4i32: +; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 +} + +define <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +; CHECK-LABEL: umlsl_v2i32_v2i64: +; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 +} + +; SMULL recognizing BUILD_VECTORs with sign/zero-extended elements. +define <8 x i16> @smull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind { +; CHECK-LABEL: smull_extvec_v8i8_v8i16: +; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp3 = sext <8 x i8> %arg to <8 x i16> + %tmp4 = mul <8 x i16> %tmp3, + ret <8 x i16> %tmp4 +} + +define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { +; Do not use SMULL if the BUILD_VECTOR element values are too big. +; CHECK-LABEL: smull_noextvec_v8i8_v8i16: +; CHECK: movz +; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + %tmp3 = sext <8 x i8> %arg to <8 x i16> + %tmp4 = mul <8 x i16> %tmp3, + ret <8 x i16> %tmp4 +} + +define <4 x i32> @smull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind { +; CHECK-LABEL: smull_extvec_v4i16_v4i32: +; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp3 = sext <4 x i16> %arg to <4 x i32> + %tmp4 = mul <4 x i32> %tmp3, + ret <4 x i32> %tmp4 +} + +define <2 x i64> @smull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind { +; CHECK: smull_extvec_v2i32_v2i64 +; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp3 = sext <2 x i32> %arg to <2 x i64> + %tmp4 = mul <2 x i64> %tmp3, + ret <2 x i64> %tmp4 +} + +define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind { +; CHECK-LABEL: umull_extvec_v8i8_v8i16: +; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp3 = zext <8 x i8> %arg to <8 x i16> + %tmp4 = mul <8 x i16> %tmp3, + ret <8 x i16> %tmp4 +} + +define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { +; Do not use SMULL if the BUILD_VECTOR element values are too big. +; CHECK-LABEL: umull_noextvec_v8i8_v8i16: +; CHECK: movz +; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + %tmp3 = zext <8 x i8> %arg to <8 x i16> + %tmp4 = mul <8 x i16> %tmp3, + ret <8 x i16> %tmp4 +} + +define <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind { +; CHECK-LABEL: umull_extvec_v4i16_v4i32: +; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp3 = zext <4 x i16> %arg to <4 x i32> + %tmp4 = mul <4 x i32> %tmp3, + ret <4 x i32> %tmp4 +} + +define <2 x i64> @umull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind { +; CHECK-LABEL: umull_extvec_v2i32_v2i64: +; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp3 = zext <2 x i32> %arg to <2 x i64> + %tmp4 = mul <2 x i64> %tmp3, + ret <2 x i64> %tmp4 +} + +define i16 @smullWithInconsistentExtensions(<8 x i8> %vec) { +; If one operand has a zero-extend and the other a sign-extend, smull +; cannot be used. +; CHECK-LABEL: smullWithInconsistentExtensions: +; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + %1 = sext <8 x i8> %vec to <8 x i16> + %2 = mul <8 x i16> %1, + %3 = extractelement <8 x i16> %2, i32 0 + ret i16 %3 +} + +define void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind { +entry: +; CHECK-LABEL: distribute: +; CHECK: umull [[REG1:(v[0-9]+.8h)]], {{v[0-9]+}}.8b, [[REG2:(v[0-9]+.8b)]] +; CHECK: umlal [[REG1]], {{v[0-9]+}}.8b, [[REG2]] + %0 = trunc i32 %mul to i8 + %1 = insertelement <8 x i8> undef, i8 %0, i32 0 + %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + %3 = tail call <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8* %src, i32 1) + %4 = bitcast <16 x i8> %3 to <2 x double> + %5 = extractelement <2 x double> %4, i32 1 + %6 = bitcast double %5 to <8 x i8> + %7 = zext <8 x i8> %6 to <8 x i16> + %8 = zext <8 x i8> %2 to <8 x i16> + %9 = extractelement <2 x double> %4, i32 0 + %10 = bitcast double %9 to <8 x i8> + %11 = zext <8 x i8> %10 to <8 x i16> + %12 = add <8 x i16> %7, %11 + %13 = mul <8 x i16> %12, %8 + %14 = bitcast i16* %dst to i8* + tail call void @llvm.aarch64.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2) + ret void +} + +declare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly + +declare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind + diff --git a/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/test/CodeGen/AArch64/aarch64-wide-shuffle.ll new file mode 100644 index 0000000..d06df7a --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-wide-shuffle.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) { +entry: + ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext + ; but only match the last three instructions as the first two could be combined to + ; a dup2 at some stage. + ; CHECK: dup + ; CHECK: ext + ; CHECK: ext + %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2 + %vgetq_lane = trunc i32 %x4 to i16 + %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0 + %vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2 + %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3 + %vgetq_lane261 = extractelement <8 x i16> %x5, i32 0 + %vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1 + ret <4 x i16> %vset_lane267 +} diff --git a/test/CodeGen/AArch64/aarch64_f16_be.ll b/test/CodeGen/AArch64/aarch64_f16_be.ll new file mode 100644 index 0000000..7504439 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64_f16_be.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @test_bitcast_v8f16_to_v4f32(<8 x half> %a) { +; CHECK-LABEL: test_bitcast_v8f16_to_v4f32: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v8f16_to_v4f32: +; CHECK-BE: st1 + + %x = alloca <4 x float>, align 16 + %y = bitcast <8 x half> %a to <4 x float> + store <4 x float> %y, <4 x float>* %x, align 16 + ret void +} + +define void @test_bitcast_v8f16_to_v2f64(<8 x half> %a) { +; CHECK-LABEL: test_bitcast_v8f16_to_v2f64: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v8f16_to_v2f64: +; CHECK-BE: st1 + + %x = alloca <2 x double>, align 16 + %y = bitcast <8 x half> %a to <2 x double> + store <2 x double> %y, <2 x double>* %x, align 16 + ret void +} + +define void @test_bitcast_v8f16_to_fp128(<8 x half> %a) { +; CHECK-LABEL: test_bitcast_v8f16_to_fp128: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v8f16_to_fp128: +; CHECK-BE: st1 + + %x = alloca fp128, align 16 + %y = bitcast <8 x half> %a to fp128 + store fp128 %y, fp128* %x, align 16 + ret void +} + +define void @test_bitcast_v4f16_to_v2f32(<4 x half> %a) { +; CHECK-LABEL: test_bitcast_v4f16_to_v2f32: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v4f16_to_v2f32: +; CHECK-BE: st1 + + %x = alloca <2 x float>, align 8 + %y = bitcast <4 x half> %a to <2 x float> + store <2 x float> %y, <2 x float>* %x, align 8 + ret void +} + +define void @test_bitcast_v4f16_to_v1f64(<4 x half> %a) { +; CHECK-LABEL: test_bitcast_v4f16_to_v1f64: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v4f16_to_v1f64: +; CHECK-BE: st1 + + %x = alloca <1 x double>, align 8 + %y = bitcast <4 x half> %a to <1 x double> + store <1 x double> %y, <1 x double>* %x, align 8 + ret void +} diff --git a/test/CodeGen/AArch64/aarch64_tree_tests.ll b/test/CodeGen/AArch64/aarch64_tree_tests.ll new file mode 100644 index 0000000..08e506a --- /dev/null +++ b/test/CodeGen/AArch64/aarch64_tree_tests.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s | FileCheck %s + +; ModuleID = 'aarch64_tree_tests.bc' +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "arm64--linux-gnu" + +; CHECK-LABLE: @aarch64_tree_tests_and +; CHECK: .hword 32768 +; CHECK: .hword 32767 +; CHECK: .hword 4664 +; CHECK: .hword 32767 +; CHECK: .hword 32768 +; CHECK: .hword 32768 +; CHECK: .hword 0 +; CHECK: .hword 0 + +; Function Attrs: nounwind readnone +define <8 x i16> @aarch64_tree_tests_and(<8 x i16> %a) { +entry: + %and = and <8 x i16> , %a + %ret = add <8 x i16> %and, + ret <8 x i16> %ret +} + +; CHECK-LABLE: @aarch64_tree_tests_or +; CHECK: .hword 32768 +; CHECK: .hword 32766 +; CHECK: .hword 4664 +; CHECK: .hword 32766 +; CHECK: .hword 32768 +; CHECK: .hword 32768 +; CHECK: .hword 65535 +; CHECK: .hword 65535 + +; Function Attrs: nounwind readnone +define <8 x i16> @aarch64_tree_tests_or(<8 x i16> %a) { +entry: + %or = or <8 x i16> , %a + %ret = add <8 x i16> %or, + ret <8 x i16> %ret +} + diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll index 892573b..0488ee2 100644 --- a/test/CodeGen/AArch64/adc.ll +++ b/test/CodeGen/AArch64/adc.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s define i128 @test_simple(i128 %a, i128 %b, i128 %c) { ; CHECK-LABEL: test_simple: diff --git a/test/CodeGen/AArch64/analyzecmp.ll b/test/CodeGen/AArch64/analyzecmp.ll new file mode 100644 index 0000000..8962505 --- /dev/null +++ b/test/CodeGen/AArch64/analyzecmp.ll @@ -0,0 +1,32 @@ +; RUN: llc -O3 -mcpu=cortex-a57 < %s | FileCheck %s + +; CHECK-LABLE: @test +; CHECK: tst [[CMP:x[0-9]+]], #0x8000000000000000 +; CHECK: csel [[R0:x[0-9]+]], [[S0:x[0-9]+]], [[S1:x[0-9]+]], eq +; CHECK: csel [[R1:x[0-9]+]], [[S2:x[0-9]+]], [[S3:x[0-9]+]], eq +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "arm64--linux-gnueabi" + +define void @test(i64 %a, i64* %ptr1, i64* %ptr2) #0 align 2 { +entry: + %conv = and i64 %a, 4294967295 + %add = add nsw i64 %conv, -1 + %div = sdiv i64 %add, 64 + %rem = srem i64 %add, 64 + %cmp = icmp slt i64 %rem, 0 + br i1 %cmp, label %if.then, label %exit + +if.then: + %add2 = add nsw i64 %rem, 64 + %add3 = add i64 %div, -1 + br label %exit + +exit: + %__n = phi i64 [ %add3, %if.then ], [ %div, %entry ] + %__n.0 = phi i64 [ %add2, %if.then ], [ %rem, %entry ] + store i64 %__n, i64* %ptr1 + store i64 %__n.0, i64* %ptr2 + ret void +} + + diff --git a/test/CodeGen/AArch64/and-mask-removal.ll b/test/CodeGen/AArch64/and-mask-removal.ll new file mode 100644 index 0000000..f803b85 --- /dev/null +++ b/test/CodeGen/AArch64/and-mask-removal.ll @@ -0,0 +1,269 @@ +; RUN: llc -O0 -fast-isel=false -mtriple=arm64-apple-darwin < %s | FileCheck %s + +@board = common global [400 x i8] zeroinitializer, align 1 +@next_string = common global i32 0, align 4 +@string_number = common global [400 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define void @new_position(i32 %pos) { +entry: + %idxprom = sext i32 %pos to i64 + %arrayidx = getelementptr inbounds [400 x i8]* @board, i64 0, i64 %idxprom + %tmp = load i8* %arrayidx, align 1 + %.off = add i8 %tmp, -1 + %switch = icmp ult i8 %.off, 2 + br i1 %switch, label %if.then, label %if.end + +if.then: ; preds = %entry + %tmp1 = load i32* @next_string, align 4 + %arrayidx8 = getelementptr inbounds [400 x i32]* @string_number, i64 0, i64 %idxprom + store i32 %tmp1, i32* %arrayidx8, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +; CHECK-LABEL: new_position +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test8_0(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 74 + %1 = icmp ult i8 %0, -20 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_0 +; CHECK: and +; CHECK: ret +} + +define zeroext i1 @test8_1(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 246 + %1 = icmp uge i8 %0, 90 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_1 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test8_2(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 227 + %1 = icmp ne i8 %0, 179 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_2 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test8_3(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 201 + %1 = icmp eq i8 %0, 154 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_3 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test8_4(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, -79 + %1 = icmp ne i8 %0, -40 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_4 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test8_5(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 133 + %1 = icmp uge i8 %0, -105 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_5 +; CHECK: and +; CHECK: ret +} + +define zeroext i1 @test8_6(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, -58 + %1 = icmp uge i8 %0, 155 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_6 +; CHECK: and +; CHECK: ret +} + +define zeroext i1 @test8_7(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 225 + %1 = icmp ult i8 %0, 124 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_7 +; CHECK-NOT: and +; CHECK: ret +} + + + +define zeroext i1 @test8_8(i8 zeroext %x) align 2 { +entry: + %0 = add i8 %x, 190 + %1 = icmp uge i8 %0, 1 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test8_8 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test16_0(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, -46989 + %1 = icmp ne i16 %0, -41903 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_0 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test16_2(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, 16882 + %1 = icmp ule i16 %0, -24837 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_2 +; CHECK: and +; CHECK: ret +} + +define zeroext i1 @test16_3(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, 29283 + %1 = icmp ne i16 %0, 16947 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_3 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test16_4(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, -35551 + %1 = icmp uge i16 %0, 15677 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_4 +; CHECK: and +; CHECK: ret +} + +define zeroext i1 @test16_5(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, -25214 + %1 = icmp ne i16 %0, -1932 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_5 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test16_6(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, -32194 + %1 = icmp uge i16 %0, -41215 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_6 +; CHECK-NOT: and +; CHECK: ret +} + +define zeroext i1 @test16_7(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, 9272 + %1 = icmp uge i16 %0, -42916 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_7 +; CHECK: and +; CHECK: ret +} + +define zeroext i1 @test16_8(i16 zeroext %x) align 2 { +entry: + %0 = add i16 %x, -63749 + %1 = icmp ne i16 %0, 6706 + br i1 %1, label %ret_true, label %ret_false +ret_false: + ret i1 false +ret_true: + ret i1 true +; CHECK-LABEL: test16_8 +; CHECK-NOT: and +; CHECK: ret +} + diff --git a/test/CodeGen/AArch64/andandshift.ll b/test/CodeGen/AArch64/andandshift.ll new file mode 100644 index 0000000..e2c7a09 --- /dev/null +++ b/test/CodeGen/AArch64/andandshift.ll @@ -0,0 +1,28 @@ +; RUN: llc -O3 < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "arm64--linux-gnu" + +; Function Attrs: nounwind readnone +define i32 @test1(i8 %a) { +; CHECK-LABLE: @test1 +; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5 +entry: + %conv = zext i8 %a to i32 + %shr1 = lshr i32 %conv, 3 + ret i32 %shr1 +} + +; Function Attrs: nounwind readnone +define i32 @test2(i8 %a) { +; CHECK-LABLE: @test2 +; CHECK: and {{w[0-9]+}}, w0, #0xff +; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5 +entry: + %conv = zext i8 %a to i32 + %cmp = icmp ugt i8 %a, 47 + %shr5 = lshr i32 %conv, 3 + %retval.0 = select i1 %cmp, i32 %shr5, i32 %conv + ret i32 %retval.0 +} + + diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll index 2b083d8..e57a8c9 100644 --- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll +++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll @@ -11,34 +11,34 @@ if.then24: ; preds = %entry unreachable if.else295: ; preds = %entry - call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16), !dbg !18 + call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16, metadata !{metadata !"0x102"}), !dbg !18 store i32 0, i32* %do_tab_convert, align 4, !dbg !19 unreachable } -declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone !llvm.dbg.gv = !{!0} !llvm.dbg.sp = !{!1, !7, !10, !11, !12} -!0 = metadata !{i32 589876, i32 0, metadata !1, metadata !"vsplive", metadata !"vsplive", metadata !"", metadata !2, i32 617, metadata !6, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] -!1 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"drt_vsprintf", metadata !"drt_vsprintf", metadata !"", i32 616, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] -!4 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{metadata !"0x34\00vsplive\00vsplive\00\00617\001\001", metadata !1, metadata !2, metadata !6, null, null} ; [ DW_TAG_variable ] +!1 = metadata !{metadata !"0x2e\00drt_vsprintf\00drt_vsprintf\00\00616\000\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !4, null, null, null, null, null} ; [ DW_TAG_subprogram ] +!2 = metadata !{metadata !"0x29", metadata !20} ; [ DW_TAG_file_type ] +!3 = metadata !{metadata !"0x11\0012\00clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)\001\00\000\00\000", metadata !20, metadata !21, metadata !21, null, null, null} ; [ DW_TAG_compile_unit ] +!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !2, null, metadata !5, i32 0} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6} -!6 = metadata !{i32 589860, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!7 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"putc_mem", metadata !"putc_mem", metadata !"", i32 30, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] -!8 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!6 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !3} ; [ DW_TAG_base_type ] +!7 = metadata !{metadata !"0x2e\00putc_mem\00putc_mem\00\0030\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !8, null, null, null, null, null} ; [ DW_TAG_subprogram ] +!8 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !2, null, metadata !9, i32 0} ; [ DW_TAG_subroutine_type ] !9 = metadata !{null} -!10 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_double", metadata !"print_double", metadata !"", i32 203, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] -!11 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_number", metadata !"print_number", metadata !"", i32 75, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] -!12 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"get_flags", metadata !"get_flags", metadata !"", i32 508, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!10 = metadata !{metadata !"0x2e\00print_double\00print_double\00\00203\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !4, null, null, null, null, null} ; [ DW_TAG_subprogram ] +!11 = metadata !{metadata !"0x2e\00print_number\00print_number\00\0075\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !4, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ] +!12 = metadata !{metadata !"0x2e\00get_flags\00get_flags\00\00508\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !8, null, null, null, null, null} ; [ DW_TAG_subprogram ] !13 = metadata !{i32 653, i32 5, metadata !14, null} -!14 = metadata !{i32 589835, metadata !20, metadata !15, i32 652, i32 35, i32 2} ; [ DW_TAG_lexical_block ] -!15 = metadata !{i32 589835, metadata !20, metadata !1, i32 616, i32 1, i32 0} ; [ DW_TAG_lexical_block ] -!16 = metadata !{i32 590080, metadata !17, metadata !"do_tab_convert", metadata !2, i32 853, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ] -!17 = metadata !{i32 589835, metadata !20, metadata !14, i32 850, i32 12, i32 33} ; [ DW_TAG_lexical_block ] +!14 = metadata !{metadata !"0xb\00652\0035\002", metadata !20, metadata !15} ; [ DW_TAG_lexical_block ] +!15 = metadata !{metadata !"0xb\00616\001\000", metadata !20, metadata !1} ; [ DW_TAG_lexical_block ] +!16 = metadata !{metadata !"0x100\00do_tab_convert\00853\000", metadata !17, metadata !2, metadata !6} ; [ DW_TAG_auto_variable ] +!17 = metadata !{metadata !"0xb\00850\0012\0033", metadata !20, metadata !14} ; [ DW_TAG_lexical_block ] !18 = metadata !{i32 853, i32 11, metadata !17, null} !19 = metadata !{i32 853, i32 29, metadata !17, null} !20 = metadata !{metadata !"print.i", metadata !"/Volumes/Ebi/echeng/radars/r9146594"} diff --git a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll index 8f99bc3..a83f164 100644 --- a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll +++ b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll @@ -12,7 +12,7 @@ entry: for.body: ; CHECK: for.body -; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}] +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}] ; CHECK: add x[[REG:[0-9]+]], ; CHECK: x[[REG]], #1, lsl #12 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll index 168e921..7d880f3 100644 --- a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll +++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=arm64 -O0 < %s | FileCheck %s -; RUN: llc -march=arm64 -O3 < %s | FileCheck %s +; RUN: llc -march=arm64 -O0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=arm64 -O3 -verify-machineinstrs < %s | FileCheck %s @.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1 @.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1 diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll index c4597d5..6266d1c 100644 --- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -1,15 +1,36 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: bar: ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 +; Without advanced copy optimization, we end up with cross register +; banks copies that cannot be coalesced. +; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] +; With advanced copy optimization, we end up with just one copy +; to insert the computed high part into the V register. +; CHECK-OPT-NOT: fmov ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 +; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] +; CHECK-NOOPT: fmov d0, [[COPY_REG3]] +; CHECK-OPT-NOT: fmov +; CHECK: ins.d v0[1], [[COPY_REG2]] +; CHECK-NEXT: ret +; ; GENERIC-LABEL: bar: ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1 +; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] +; GENERIC-OPT-NOT: fmov ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1 +; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] +; GENERIC-NOOPT: fmov d0, [[COPY_REG3]] +; GENERIC-OPT-NOT: fmov +; GENERIC: ins v0.d[1], [[COPY_REG2]] +; GENERIC-NEXT: ret %add = add <2 x i64> %a, %b %vgetq_lane = extractelement <2 x i64> %add, i32 0 %vgetq_lane2 = extractelement <2 x i64> %b, i32 0 @@ -65,3 +86,44 @@ define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { %retval = bitcast i64 %sub.i to double ret double %retval } +define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: and_su64: +; CHECK: and.8b v0, v1, v0 +; CHECK-NEXT: ret +; GENERIC-LABEL: and_su64: +; GENERIC: and v0.8b, v1.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %or.i = and i64 %vecext1, %vecext + %retval = bitcast i64 %or.i to double + ret double %retval +} + +define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: orr_su64: +; CHECK: orr.8b v0, v1, v0 +; CHECK-NEXT: ret +; GENERIC-LABEL: orr_su64: +; GENERIC: orr v0.8b, v1.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %or.i = or i64 %vecext1, %vecext + %retval = bitcast i64 %or.i to double + ret double %retval +} + +define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: xorr_su64: +; CHECK: eor.8b v0, v1, v0 +; CHECK-NEXT: ret +; GENERIC-LABEL: xorr_su64: +; GENERIC: eor v0.8b, v1.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %xor.i = xor i64 %vecext1, %vecext + %retval = bitcast i64 %xor.i to double + ret double %retval +} diff --git a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll index 1b2d543..1bb47fc 100644 --- a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll +++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc -O0 -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s ; The following 2 test cases test shufflevector with beginning UNDEF mask. define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) { diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll new file mode 100644 index 0000000..77e2b0f --- /dev/null +++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=aarch64_be-none-eabi -fast-isel=false < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-eabi -fast-isel=true < %s | FileCheck %s + +; Check narrow argument passing via stack - callee end +define i32 @test_narrow_args_callee(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s) #0 { +entry: + %conv = zext i8 %c to i32 + %conv1 = sext i16 %s to i32 + %add = add nsw i32 %conv1, %conv +; CHECK-LABEL: test_narrow_args_callee: +; CHECK-DAG: ldrb w{{[0-9]}}, [sp, #7] +; CHECK-DAG: ldr{{s?}}h w{{[0-9]}}, [sp, #14] + ret i32 %add +} + +; Check narrow argument passing via stack - caller end +define i32 @test_narrow_args_caller() #0 { +entry: + %call = tail call i32 @test_narrow_args_callee(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9) +; CHECK-LABEL: test_narrow_args_caller: +; CHECK-DAG: strh w{{[0-9]}}, [sp, #14] +; CHECK-DAG: strb w{{[0-9]}}, [sp, #7] + ret i32 %call +} \ No newline at end of file diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll index ccf1371..41c3ad5 100644 --- a/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-aapcs.ll @@ -109,3 +109,45 @@ entry: ; CHECK: ldr {{q[0-9]+}}, [sp] ret <2 x double> %varg_stack; } + +; Check that f16 can be passed and returned (ACLE 2.0 extension) +define half @test_half(float, half %arg) { +; CHECK-LABEL: test_half: +; CHECK: mov v0.16b, v1.16b + ret half %arg; +} + +; Check that f16 constants are materialized correctly +define half @test_half_const() { +; CHECK-LABEL: test_half_const: +; CHECK: ldr h0, [x{{[0-9]+}}, :lo12:{{.*}}] + ret half 0xH4248 +} + +; Check that v4f16 can be passed and returned in registers +define <4 x half> @test_v4_half_register(float, <4 x half> %arg) { +; CHECK-LABEL: test_v4_half_register: +; CHECK: mov v0.16b, v1.16b + ret <4 x half> %arg; +} + +; Check that v8f16 can be passed and returned in registers +define <8 x half> @test_v8_half_register(float, <8 x half> %arg) { +; CHECK-LABEL: test_v8_half_register: +; CHECK: mov v0.16b, v1.16b + ret <8 x half> %arg; +} + +; Check that v4f16 can be passed and returned on the stack +define <4 x half> @test_v4_half_stack([8 x <2 x double>], <4 x half> %arg) { +; CHECK-LABEL: test_v4_half_stack: +; CHECK: ldr d0, [sp] + ret <4 x half> %arg; +} + +; Check that v8f16 can be passed and returned on the stack +define <8 x half> @test_v8_half_stack([8 x <2 x double>], <8 x half> %arg) { +; CHECK-LABEL: test_v8_half_stack: +; CHECK: ldr q0, [sp] + ret <8 x half> %arg; +} diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index a955029..8a6b64d 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -1,7 +1,5 @@ -; RUN: llc < %s -debug -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s -; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s -; REQUIRES: asserts -target triple = "arm64-apple-darwin" +; RUN: llc -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false < %s | FileCheck %s +; RUN: llc -O0 -mtriple=arm64-apple-darwin < %s | FileCheck --check-prefix=FAST %s ; rdar://9932559 define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline { @@ -42,7 +40,7 @@ entry: define i32 @i8i16caller() nounwind readnone { entry: -; CHECK: i8i16caller +; CHECK-LABEL: i8i16caller ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. ; They are i8, i16, i8 and i8. ; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5] @@ -50,7 +48,7 @@ entry: ; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2] ; CHECK-DAG: strb {{w[0-9]+}}, [sp] ; CHECK: bl -; FAST: i8i16caller +; FAST-LABEL: i8i16caller ; FAST: strb {{w[0-9]+}}, [sp] ; FAST: strh {{w[0-9]+}}, [sp, #2] ; FAST: strb {{w[0-9]+}}, [sp, #4] @@ -64,7 +62,7 @@ entry: ; rdar://12651543 define double @circle_center([2 x float] %a) nounwind ssp { %call = tail call double @ext([2 x float] %a) nounwind -; CHECK: circle_center +; CHECK-LABEL: circle_center ; CHECK: bl ret double %call } @@ -75,10 +73,10 @@ declare double @ext([2 x float]) ; A double argument will be passed on stack, so vecotr should be at sp+16. define double @fixed_4i(<4 x i32>* nocapture %in) nounwind { entry: -; CHECK: fixed_4i +; CHECK-LABEL: fixed_4i ; CHECK: str [[REG_1:q[0-9]+]], [sp, #16] -; FAST: fixed_4i -; FAST: sub sp, sp, #64 +; FAST-LABEL: fixed_4i +; FAST: sub sp, sp ; FAST: mov x[[ADDR:[0-9]+]], sp ; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16] %0 = load <4 x i32>* %in, align 16 @@ -93,7 +91,7 @@ declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp { entry: -; CHECK: test1 +; CHECK-LABEL: test1 ; CHECK: ldr [[REG_1:d[0-9]+]], [sp] ; CHECK: scvtf [[REG_2:s[0-9]+]], w0 ; CHECK: fadd s0, [[REG_2]], s0 @@ -110,7 +108,7 @@ entry: define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp { entry: -; CHECK: test2 +; CHECK-LABEL: test2 ; CHECK: scvtf [[REG_2:s[0-9]+]], w0 ; CHECK: fadd s0, [[REG_2]], s0 ; CHECK: ldr [[REG_1:s[0-9]+]], [sp] @@ -129,9 +127,9 @@ entry: ; Check alignment on stack for v64, f64, i64, f32, i32. define double @test3(<2 x i32>* nocapture %in) nounwind { entry: -; CHECK: test3 +; CHECK-LABEL: test3 ; CHECK: str [[REG_1:d[0-9]+]], [sp, #8] -; FAST: test3 +; FAST-LABEL: test3 ; FAST: sub sp, sp, #32 ; FAST: mov x[[ADDR:[0-9]+]], sp ; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8] @@ -146,7 +144,7 @@ declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, define double @test4(double* nocapture %in) nounwind { entry: -; CHECK: test4 +; CHECK-LABEL: test4 ; CHECK: str [[REG_1:d[0-9]+]], [sp, #8] ; CHECK: str [[REG_2:w[0-9]+]], [sp] ; CHECK: orr w0, wzr, #0x3 @@ -161,7 +159,7 @@ declare double @args_f64(double, double, double, double, double, double, double, define i64 @test5(i64* nocapture %in) nounwind { entry: -; CHECK: test5 +; CHECK-LABEL: test5 ; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16] ; CHECK: str [[REG_1:x[0-9]+]], [sp, #8] ; CHECK: str [[REG_2:w[0-9]+]], [sp] @@ -175,7 +173,7 @@ declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, define i32 @test6(float* nocapture %in) nounwind { entry: -; CHECK: test6 +; CHECK-LABEL: test6 ; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8] ; CHECK: str [[REG_1:s[0-9]+]], [sp, #4] ; CHECK: strh [[REG_3:w[0-9]+]], [sp] @@ -192,7 +190,7 @@ declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32, define i32 @test7(i32* nocapture %in) nounwind { entry: -; CHECK: test7 +; CHECK-LABEL: test7 ; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8] ; CHECK: str [[REG_1:w[0-9]+]], [sp, #4] ; CHECK: strh [[REG_3:w[0-9]+]], [sp] @@ -206,13 +204,13 @@ declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32, define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind { entry: -; CHECK: test8 +; CHECK-LABEL: test8 ; CHECK: strb {{w[0-9]+}}, [sp, #3] ; CHECK: strb wzr, [sp, #2] ; CHECK: strb {{w[0-9]+}}, [sp, #1] ; CHECK: strb wzr, [sp] ; CHECK: bl -; FAST: test8 +; FAST-LABEL: test8 ; FAST: strb {{w[0-9]+}}, [sp] ; FAST: strb {{w[0-9]+}}, [sp, #1] ; FAST: strb {{w[0-9]+}}, [sp, #2] diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll index 44c5a07..deb740e 100644 --- a/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/test/CodeGen/AArch64/arm64-abi_align.ll @@ -34,7 +34,7 @@ target triple = "arm64-apple-darwin" ; structs with size < 8 bytes, passed via i64 in x1 and x2 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 { entry: -; CHECK: f38 +; CHECK-LABEL: f38 ; CHECK: add w[[A:[0-9]+]], w1, w0 ; CHECK: add {{w[0-9]+}}, w[[A]], w2 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32 @@ -56,7 +56,7 @@ entry: define i32 @caller38() #1 { entry: -; CHECK: caller38 +; CHECK-LABEL: caller38 ; CHECK: ldr x1, ; CHECK: ldr x2, %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 @@ -72,7 +72,7 @@ declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, ; i9 at [sp] define i32 @caller38_stack() #1 { entry: -; CHECK: caller38_stack +; CHECK-LABEL: caller38_stack ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] ; CHECK: movz w[[C:[0-9]+]], #0x9 ; CHECK: str w[[C]], [sp] @@ -87,7 +87,7 @@ entry: ; passed via i128 in x1 and x3 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { entry: -; CHECK: f39 +; CHECK-LABEL: f39 ; CHECK: add w[[A:[0-9]+]], w1, w0 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 @@ -109,7 +109,7 @@ entry: define i32 @caller39() #1 { entry: -; CHECK: caller39 +; CHECK-LABEL: caller39 ; CHECK: ldp x1, x2, ; CHECK: ldp x3, x4, %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 @@ -125,7 +125,7 @@ declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, ; passed on stack at [sp+16] and [sp+32] define i32 @caller39_stack() #1 { entry: -; CHECK: caller39_stack +; CHECK-LABEL: caller39_stack ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ; CHECK: movz w[[C:[0-9]+]], #0x9 @@ -141,7 +141,7 @@ entry: ; passed via i128 in x1 and x3 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 { entry: -; CHECK: f40 +; CHECK-LABEL: f40 ; CHECK: add w[[A:[0-9]+]], w1, w0 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0 @@ -165,7 +165,7 @@ entry: define i32 @caller40() #1 { entry: -; CHECK: caller40 +; CHECK-LABEL: caller40 ; CHECK: ldp x1, x2, ; CHECK: ldp x3, x4, %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 @@ -181,7 +181,7 @@ declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, ; passed on stack at [sp+8] and [sp+24] define i32 @caller40_stack() #1 { entry: -; CHECK: caller40_stack +; CHECK-LABEL: caller40_stack ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24] ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] ; CHECK: movz w[[C:[0-9]+]], #0x9 @@ -197,7 +197,7 @@ entry: ; passed via i128 in x1 and x3 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { entry: -; CHECK: f41 +; CHECK-LABEL: f41 ; CHECK: add w[[A:[0-9]+]], w1, w0 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 @@ -219,7 +219,7 @@ entry: define i32 @caller41() #1 { entry: -; CHECK: caller41 +; CHECK-LABEL: caller41 ; CHECK: ldp x1, x2, ; CHECK: ldp x3, x4, %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 @@ -235,7 +235,7 @@ declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, ; passed on stack at [sp+16] and [sp+32] define i32 @caller41_stack() #1 { entry: -; CHECK: caller41_stack +; CHECK-LABEL: caller41_stack ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ; CHECK: movz w[[C:[0-9]+]], #0x9 @@ -250,7 +250,7 @@ entry: ; structs with size of 22 bytes, passed indirectly in x1 and x2 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 { entry: -; CHECK: f42 +; CHECK-LABEL: f42 ; CHECK: ldr w[[A:[0-9]+]], [x1] ; CHECK: ldr w[[B:[0-9]+]], [x2] ; CHECK: add w[[C:[0-9]+]], w[[A]], w0 @@ -280,7 +280,7 @@ entry: ; For s1, we allocate a 22-byte space, pass its address via x1 define i32 @caller42() #3 { entry: -; CHECK: caller42 +; CHECK-LABEL: caller42 ; CHECK: str {{x[0-9]+}}, [sp, #48] ; CHECK: str {{q[0-9]+}}, [sp, #32] ; CHECK: str {{x[0-9]+}}, [sp, #16] @@ -290,7 +290,7 @@ entry: ; Space for s1 is allocated at sp+32 ; Space for s2 is allocated at sp -; FAST: caller42 +; FAST-LABEL: caller42 ; FAST: sub sp, sp, #96 ; Space for s1 is allocated at fp-24 = sp+72 ; Space for s2 is allocated at sp+48 @@ -316,7 +316,7 @@ declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, define i32 @caller42_stack() #3 { entry: -; CHECK: caller42_stack +; CHECK-LABEL: caller42_stack ; CHECK: mov x29, sp ; CHECK: sub sp, sp, #96 ; CHECK: stur {{x[0-9]+}}, [x29, #-16] @@ -333,7 +333,7 @@ entry: ; CHECK: movz w[[C:[0-9]+]], #0x9 ; CHECK: str w[[C]], [sp] -; FAST: caller42_stack +; FAST-LABEL: caller42_stack ; Space for s1 is allocated at fp-24 ; Space for s2 is allocated at fp-48 ; FAST: sub x[[A:[0-9]+]], x29, #24 @@ -359,12 +359,12 @@ entry: ; passed indirectly in x1 and x2 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 { entry: -; CHECK: f43 +; CHECK-LABEL: f43 ; CHECK: ldr w[[A:[0-9]+]], [x1] ; CHECK: ldr w[[B:[0-9]+]], [x2] ; CHECK: add w[[C:[0-9]+]], w[[A]], w0 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] -; FAST: f43 +; FAST-LABEL: f43 ; FAST: ldr w[[A:[0-9]+]], [x1] ; FAST: ldr w[[B:[0-9]+]], [x2] ; FAST: add w[[C:[0-9]+]], w[[A]], w0 @@ -388,7 +388,7 @@ entry: define i32 @caller43() #3 { entry: -; CHECK: caller43 +; CHECK-LABEL: caller43 ; CHECK: str {{q[0-9]+}}, [sp, #48] ; CHECK: str {{q[0-9]+}}, [sp, #32] ; CHECK: str {{q[0-9]+}}, [sp, #16] @@ -398,7 +398,7 @@ entry: ; Space for s1 is allocated at sp+32 ; Space for s2 is allocated at sp -; FAST: caller43 +; FAST-LABEL: caller43 ; FAST: mov x29, sp ; Space for s1 is allocated at sp+32 ; Space for s2 is allocated at sp @@ -428,7 +428,7 @@ declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, define i32 @caller43_stack() #3 { entry: -; CHECK: caller43_stack +; CHECK-LABEL: caller43_stack ; CHECK: mov x29, sp ; CHECK: sub sp, sp, #96 ; CHECK: stur {{q[0-9]+}}, [x29, #-16] @@ -445,7 +445,7 @@ entry: ; CHECK: movz w[[C:[0-9]+]], #0x9 ; CHECK: str w[[C]], [sp] -; FAST: caller43_stack +; FAST-LABEL: caller43_stack ; FAST: sub sp, sp, #96 ; Space for s1 is allocated at fp-32 = sp+64 ; Space for s2 is allocated at sp+32 @@ -481,13 +481,13 @@ declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, define i32 @i128_split() { entry: -; CHECK: i128_split +; CHECK-LABEL: i128_split ; "i128 %0" should be on stack at [sp]. ; "i32 8" should be on stack at [sp, #16]. ; CHECK: str {{w[0-9]+}}, [sp, #16] ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] -; FAST: i128_split -; FAST: sub sp, sp, #48 +; FAST-LABEL: i128_split +; FAST: sub sp, sp ; FAST: mov x[[ADDR:[0-9]+]], sp ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] ; Load/Store opt is disabled with -O0, so the i128 is split. @@ -504,14 +504,16 @@ declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, define i32 @i64_split() { entry: -; CHECK: i64_split +; CHECK-LABEL: i64_split ; "i64 %0" should be in register x7. ; "i32 8" should be on stack at [sp]. ; CHECK: ldr x7, [{{x[0-9]+}}] ; CHECK: str {{w[0-9]+}}, [sp] -; FAST: i64_split +; FAST-LABEL: i64_split ; FAST: ldr x7, [{{x[0-9]+}}] -; FAST: str {{w[0-9]+}}, [sp] +; FAST: mov x[[R0:[0-9]+]], sp +; FAST: orr w[[R1:[0-9]+]], wzr, #0x8 +; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}} %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16 %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i64 %0, i32 8) #5 diff --git a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll index 08fb8c9..74bb398 100644 --- a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s +; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-gep-opt=false %s -o - | FileCheck %s ; @block = common global i8* null, align 8 diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll index 700fba8..5433a8c 100644 --- a/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/test/CodeGen/AArch64/arm64-addrmode.ll @@ -37,9 +37,8 @@ define void @t3() { ; base + unsigned offset (> imm12 * size of type in bytes) ; CHECK: @t4 -; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #8, lsl #12 -; CHECK: ldr xzr, [ -; CHECK: [[ADDREG]]] +; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000 +; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret define void @t4() { %incdec.ptr = getelementptr inbounds i64* @object, i64 4096 @@ -60,9 +59,8 @@ define void @t5(i64 %a) { ; base + reg + imm ; CHECK: @t6 ; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 -; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #8, lsl #12 -; CHECK: ldr xzr, [ -; CHECK: [[ADDREG]]] +; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000 +; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret define void @t6(i64 %a) { %tmp1 = getelementptr inbounds i64* @object, i64 %a @@ -70,3 +68,114 @@ define void @t6(i64 %a) { %tmp = load volatile i64* %incdec.ptr, align 8 ret void } + +; Test base + wide immediate +define void @t7(i64 %a) { +; CHECK-LABEL: t7: +; CHECK: orr w[[NUM:[0-9]+]], wzr, #0xffff +; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]] + %1 = add i64 %a, 65535 ;0xffff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t8(i64 %a) { +; CHECK-LABEL: t8: +; CHECK: movn [[REG:x[0-9]+]], #0x1235 +; CHECK-NEXT: ldr xzr, [x0, [[REG]]] + %1 = sub i64 %a, 4662 ;-4662 is 0xffffffffffffedca + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t9(i64 %a) { +; CHECK-LABEL: t9: +; CHECK: movn [[REG:x[0-9]+]], #0x1235, lsl #16 +; CHECK-NEXT: ldr xzr, [x0, [[REG]]] + %1 = add i64 -305463297, %a ;-305463297 is 0xffffffffedcaffff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t10(i64 %a) { +; CHECK-LABEL: t10: +; CHECK: movz [[REG:x[0-9]+]], #0x123, lsl #48 +; CHECK-NEXT: ldr xzr, [x0, [[REG]]] + %1 = add i64 %a, 81909218222800896 ;0x123000000000000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t11(i64 %a) { +; CHECK-LABEL: t11: +; CHECK: movz w[[NUM:[0-9]+]], #0x123, lsl #16 +; CHECK: movk w[[NUM:[0-9]+]], #0x4567 +; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]] + %1 = add i64 %a, 19088743 ;0x1234567 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +; Test some boundaries that should not use movz/movn/orr +define void @t12(i64 %a) { +; CHECK-LABEL: t12: +; CHECK: add [[REG:x[0-9]+]], x0, #4095 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, 4095 ;0xfff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t13(i64 %a) { +; CHECK-LABEL: t13: +; CHECK: sub [[REG:x[0-9]+]], x0, #4095 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, -4095 ;-0xfff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t14(i64 %a) { +; CHECK-LABEL: t14: +; CHECK: add [[REG:x[0-9]+]], x0, #291, lsl #12 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, 1191936 ;0x123000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t15(i64 %a) { +; CHECK-LABEL: t15: +; CHECK: sub [[REG:x[0-9]+]], x0, #291, lsl #12 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, -1191936 ;0xFFFFFFFFFFEDD000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t16(i64 %a) { +; CHECK-LABEL: t16: +; CHECK: ldr xzr, [x0, #28672] + %1 = add i64 %a, 28672 ;0x7000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t17(i64 %a) { +; CHECK-LABEL: t17: +; CHECK: ldur xzr, [x0, #-256] + %1 = add i64 %a, -256 ;-0x100 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} diff --git a/test/CodeGen/AArch64/arm64-bcc.ll b/test/CodeGen/AArch64/arm64-bcc.ll new file mode 100644 index 0000000..138ae90 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-bcc.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s +; Checks for conditional branch b.vs + +; Function Attrs: nounwind +define i32 @add(i32, i32) { +entry: + %2 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 %1) + %3 = extractvalue { i32, i1 } %2, 1 + br i1 %3, label %6, label %4 + +;