From 36b56886974eae4f9c5ebc96befd3e7bfe5de338 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Wed, 23 Apr 2014 16:57:46 -0700 Subject: Update to LLVM 3.5a. Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617 --- test/CodeGen/AArch64/128bit_load_store.ll | 53 + test/CodeGen/AArch64/adc.ll | 33 +- test/CodeGen/AArch64/assertion-rc-mismatch.ll | 24 + test/CodeGen/AArch64/atomic-ops.ll | 89 +- test/CodeGen/AArch64/concatvector-bugs.ll | 68 + test/CodeGen/AArch64/cpus.ll | 13 + test/CodeGen/AArch64/fcvt-int.ll | 2 +- test/CodeGen/AArch64/fp-dp3.ll | 54 +- test/CodeGen/AArch64/func-argpassing.ll | 13 +- test/CodeGen/AArch64/func-calls.ll | 8 +- test/CodeGen/AArch64/i128-shift.ll | 43 + test/CodeGen/AArch64/init-array.ll | 1 + test/CodeGen/AArch64/inline-asm-constraints.ll | 2 +- test/CodeGen/AArch64/inline-asm-modifiers.ll | 2 +- test/CodeGen/AArch64/jump-table.ll | 16 + test/CodeGen/AArch64/mature-mc-support.ll | 12 + test/CodeGen/AArch64/misched-basic-A53.ll | 112 + test/CodeGen/AArch64/mul-lohi.ll | 19 + test/CodeGen/AArch64/neon-2velem.ll | 303 + test/CodeGen/AArch64/neon-3vdiff.ll | 27 + test/CodeGen/AArch64/neon-across.ll | 20 +- test/CodeGen/AArch64/neon-add-pairwise.ll | 9 + test/CodeGen/AArch64/neon-add-sub.ll | 84 +- test/CodeGen/AArch64/neon-bitcast.ll | 12 +- test/CodeGen/AArch64/neon-bitwise-instructions.ll | 723 +- test/CodeGen/AArch64/neon-bsl.ll | 13 + test/CodeGen/AArch64/neon-copy.ll | 887 +- test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll | 47 + test/CodeGen/AArch64/neon-crypto.ll | 63 +- test/CodeGen/AArch64/neon-extract.ll | 32 + test/CodeGen/AArch64/neon-facge-facgt.ll | 24 +- test/CodeGen/AArch64/neon-fma.ll | 50 +- test/CodeGen/AArch64/neon-fpround_f128.ll | 18 + test/CodeGen/AArch64/neon-load-store-v1i32.ll | 29 + test/CodeGen/AArch64/neon-max-min-pairwise.ll | 36 + test/CodeGen/AArch64/neon-misc.ll | 399 +- test/CodeGen/AArch64/neon-mla-mls.ll | 24 +- test/CodeGen/AArch64/neon-mov.ll | 82 +- test/CodeGen/AArch64/neon-mul-div.ll | 597 +- test/CodeGen/AArch64/neon-or-combine.ll | 29 + test/CodeGen/AArch64/neon-perm.ll | 1441 ++ test/CodeGen/AArch64/neon-scalar-add-sub.ll | 12 +- test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll | 24 +- test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll | 28 +- test/CodeGen/AArch64/neon-scalar-compare.ll | 28 +- test/CodeGen/AArch64/neon-scalar-copy.ll | 43 +- test/CodeGen/AArch64/neon-scalar-cvt.ll | 52 +- test/CodeGen/AArch64/neon-scalar-ext.ll | 113 + test/CodeGen/AArch64/neon-scalar-fabd.ll | 14 +- test/CodeGen/AArch64/neon-scalar-fcvt.ll | 108 +- test/CodeGen/AArch64/neon-scalar-fp-compare.ll | 254 +- test/CodeGen/AArch64/neon-scalar-recip.ll | 72 +- .../CodeGen/AArch64/neon-scalar-reduce-pairwise.ll | 212 +- test/CodeGen/AArch64/neon-scalar-rounding-shift.ll | 8 +- .../AArch64/neon-scalar-saturating-add-sub.ll | 32 +- .../neon-scalar-saturating-rounding-shift.ll | 20 +- .../AArch64/neon-scalar-saturating-shift.ll | 20 +- test/CodeGen/AArch64/neon-scalar-shift.ll | 206 +- test/CodeGen/AArch64/neon-select_cc.ll | 202 + test/CodeGen/AArch64/neon-shift-left-long.ll | 10 + test/CodeGen/AArch64/neon-shl-ashr-lshr.ll | 333 + test/CodeGen/AArch64/neon-simd-ldst-one.ll | 188 +- test/CodeGen/AArch64/neon-simd-tbl.ll | 176 +- test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll | 30 + test/CodeGen/AArch64/neon-truncStore-extLoad.ll | 57 + test/CodeGen/AArch64/neon-v1i1-setcc.ll | 68 + test/CodeGen/AArch64/neon-vector-list-spill.ll | 175 + test/CodeGen/AArch64/pic-eh-stubs.ll | 5 +- test/CodeGen/AArch64/ragreedy-csr.ll | 297 + test/CodeGen/AArch64/sext_inreg.ll | 198 + test/CodeGen/AArch64/sincospow-vector-expansion.ll | 96 + test/CodeGen/AArch64/variadic.ll | 65 +- test/CodeGen/ARM/2006-11-10-CycleInDAG.ll | 2 +- test/CodeGen/ARM/2007-04-03-PEIBug.ll | 5 +- test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll | 2 +- test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll | 5 +- test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll | 5 +- test/CodeGen/ARM/2008-07-17-Fdiv.ll | 2 +- test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll | 2 +- test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll | 2 +- test/CodeGen/ARM/2009-03-09-AddrModeBug.ll | 2 +- test/CodeGen/ARM/2009-04-06-AsmModifier.ll | 5 +- test/CodeGen/ARM/2009-04-08-AggregateAddr.ll | 2 +- test/CodeGen/ARM/2009-04-08-FREM.ll | 2 +- test/CodeGen/ARM/2009-04-08-FloatUndef.ll | 2 +- test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll | 2 +- test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll | 3 +- test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll | 4 +- test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll | 2 +- test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll | 2 +- test/CodeGen/ARM/2009-08-23-linkerprivate.ll | 8 - test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll | 3 +- test/CodeGen/ARM/2009-09-10-postdec.ll | 2 +- test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll | 2 +- test/CodeGen/ARM/2009-09-24-spill-align.ll | 2 +- test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll | 2 +- test/CodeGen/ARM/2010-04-09-NeonSelect.ll | 4 +- test/CodeGen/ARM/2010-04-14-SplitVector.ll | 2 +- test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll | 2 +- test/CodeGen/ARM/2010-05-21-BuildVector.ll | 2 +- test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll | 4 +- test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll | 2 +- test/CodeGen/ARM/2010-07-26-GlobalMerge.ll | 2 +- test/CodeGen/ARM/2010-08-04-StackVariable.ll | 2 +- test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll | 285 - test/CodeGen/ARM/2010-12-07-PEIBug.ll | 6 +- test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll | 6 +- test/CodeGen/ARM/2011-04-12-AlignBug.ll | 8 +- test/CodeGen/ARM/2011-06-09-TailCallByVal.ll | 2 +- test/CodeGen/ARM/2011-06-16-TailCallByVal.ll | 4 +- test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll | 6 +- test/CodeGen/ARM/2011-10-26-memset-inline.ll | 8 +- test/CodeGen/ARM/2011-10-26-memset-with-neon.ll | 2 +- .../ARM/2011-11-07-PromoteVectorLoadStore.ll | 2 +- test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll | 2 +- .../ARM/2011-11-09-IllegalVectorFPIntConvert.ll | 2 +- test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll | 2 +- test/CodeGen/ARM/2012-04-10-DAGCombine.ll | 2 +- test/CodeGen/ARM/2012-05-04-vmov.ll | 8 +- test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll | 2 +- test/CodeGen/ARM/2012-08-23-legalize-vmull.ll | 2 +- test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll | 2 +- .../ARM/2012-09-25-InlineAsmScalarToVectorConv.ll | 2 +- .../ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll | 2 +- .../ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll | 4 +- test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll | 2 +- .../2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll | 6 +- test/CodeGen/ARM/2013-05-05-IfConvertBug.ll | 21 + .../ARM/2014-01-09-pseudo_expand_implicit_reg.ll | 56 + .../CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll | 22 + .../ARM/2014-02-21-byval-reg-split-alignment.ll | 114 + test/CodeGen/ARM/DbgValueOtherTargets.test | 2 +- test/CodeGen/ARM/Windows/aapcs.ll | 16 + test/CodeGen/ARM/Windows/hard-float.ll | 10 + test/CodeGen/ARM/Windows/mangling.ll | 9 + test/CodeGen/ARM/Windows/no-aeabi.ll | 10 + test/CodeGen/ARM/Windows/no-arm-mode.ll | 5 + test/CodeGen/ARM/Windows/no-ehabi.ll | 21 + test/CodeGen/ARM/a15-SD-dep.ll | 59 + test/CodeGen/ARM/a15-mla.ll | 3 +- test/CodeGen/ARM/a15.ll | 2 +- test/CodeGen/ARM/addrmode.ll | 5 +- test/CodeGen/ARM/addrspacecast.ll | 2 +- test/CodeGen/ARM/arm-abi-attr.ll | 28 + test/CodeGen/ARM/arm-and-tst-peephole.ll | 9 +- test/CodeGen/ARM/arm-asm.ll | 2 +- test/CodeGen/ARM/arm-modifier.ll | 2 +- test/CodeGen/ARM/arm-negative-stride.ll | 2 +- test/CodeGen/ARM/arm-ttype-target2.ll | 2 +- test/CodeGen/ARM/atomic-64bit.ll | 157 +- test/CodeGen/ARM/atomic-cmp.ll | 2 +- test/CodeGen/ARM/atomic-load-store.ll | 2 +- test/CodeGen/ARM/atomic-op.ll | 37 + test/CodeGen/ARM/atomic-ops-v8.ll | 413 +- test/CodeGen/ARM/atomicrmw_minmax.ll | 4 +- test/CodeGen/ARM/bfc.ll | 2 +- test/CodeGen/ARM/bfi.ll | 2 +- test/CodeGen/ARM/bfx.ll | 2 +- test/CodeGen/ARM/bic.ll | 2 +- test/CodeGen/ARM/bits.ll | 2 +- test/CodeGen/ARM/build-attributes-encoding.s | 10 +- test/CodeGen/ARM/build-attributes.ll | 458 + test/CodeGen/ARM/cache-intrinsic.ll | 26 + test/CodeGen/ARM/call-tc.ll | 6 +- test/CodeGen/ARM/call.ll | 12 +- test/CodeGen/ARM/carry.ll | 2 +- test/CodeGen/ARM/clz.ll | 2 +- test/CodeGen/ARM/coalesce-dbgvalue.ll | 2 +- test/CodeGen/ARM/compare-call.ll | 6 +- test/CodeGen/ARM/constantfp.ll | 12 +- test/CodeGen/ARM/crash-O0.ll | 2 +- test/CodeGen/ARM/cse-ldrlit.ll | 61 + test/CodeGen/ARM/ctz.ll | 2 +- test/CodeGen/ARM/debug-frame-large-stack.ll | 99 + test/CodeGen/ARM/debug-frame-no-debug.ll | 97 + test/CodeGen/ARM/debug-frame-vararg.ll | 141 + test/CodeGen/ARM/debug-frame.ll | 574 + test/CodeGen/ARM/debug-info-qreg.ll | 10 +- test/CodeGen/ARM/debug-info-s16-reg.ll | 6 +- test/CodeGen/ARM/debug-info-sreg2.ll | 14 +- test/CodeGen/ARM/debug-segmented-stacks.ll | 80 + test/CodeGen/ARM/default-float-abi.ll | 22 + test/CodeGen/ARM/divmod-eabi.ll | 5 +- test/CodeGen/ARM/dyn-stackalloc.ll | 2 +- test/CodeGen/ARM/ehabi-filters.ll | 2 +- test/CodeGen/ARM/ehabi-no-landingpad.ll | 3 +- test/CodeGen/ARM/ehabi-unwind.ll | 5 +- test/CodeGen/ARM/ehabi.ll | 22 +- test/CodeGen/ARM/extload-knownzero.ll | 2 +- test/CodeGen/ARM/extloadi1.ll | 3 +- test/CodeGen/ARM/fadds.ll | 23 +- test/CodeGen/ARM/fast-isel-call.ll | 2 - test/CodeGen/ARM/fast-isel-crash2.ll | 4 +- test/CodeGen/ARM/fast-isel-frameaddr.ll | 16 +- test/CodeGen/ARM/fast-isel-intrinsic.ll | 42 +- test/CodeGen/ARM/fast-isel-static.ll | 8 +- test/CodeGen/ARM/fast-tail-call.ll | 2 +- test/CodeGen/ARM/fastcc-vfp.ll | 40 + test/CodeGen/ARM/fastisel-thumb-litpool.ll | 11 + test/CodeGen/ARM/fdivs.ll | 8 +- test/CodeGen/ARM/fixunsdfdi.ll | 7 +- test/CodeGen/ARM/fmacs.ll | 10 +- test/CodeGen/ARM/fmdrr-fmrrd.ll | 8 +- test/CodeGen/ARM/fmscs.ll | 6 +- test/CodeGen/ARM/fmuls.ll | 23 +- test/CodeGen/ARM/fnegs.ll | 23 +- test/CodeGen/ARM/fnmacs.ll | 6 +- test/CodeGen/ARM/fnmscs.ll | 23 +- test/CodeGen/ARM/fnmul.ll | 10 +- test/CodeGen/ARM/fnmuls.ll | 8 +- test/CodeGen/ARM/fold-const.ll | 2 +- test/CodeGen/ARM/fold-stack-adjust.ll | 76 +- test/CodeGen/ARM/formal.ll | 2 +- test/CodeGen/ARM/fp-arg-shuffle.ll | 2 +- test/CodeGen/ARM/fp-fast.ll | 3 +- test/CodeGen/ARM/fp.ll | 2 +- test/CodeGen/ARM/fp16.ll | 2 +- test/CodeGen/ARM/fp_convert.ll | 23 +- test/CodeGen/ARM/fpcmp-opt.ll | 4 +- test/CodeGen/ARM/fpcmp.ll | 2 +- test/CodeGen/ARM/fpconsts.ll | 2 +- test/CodeGen/ARM/fpconv.ll | 4 +- test/CodeGen/ARM/fpmem.ll | 2 +- test/CodeGen/ARM/fpow.ll | 2 +- test/CodeGen/ARM/fptoint.ll | 2 +- test/CodeGen/ARM/fsubs.ll | 19 +- test/CodeGen/ARM/hello.ll | 21 +- test/CodeGen/ARM/iabs.ll | 2 +- test/CodeGen/ARM/ifconv-kills.ll | 2 +- test/CodeGen/ARM/ifcvt-branch-weight-bug.ll | 62 + test/CodeGen/ARM/ifcvt-branch-weight.ll | 42 + test/CodeGen/ARM/ifcvt1.ll | 4 +- test/CodeGen/ARM/ifcvt2.ll | 2 +- test/CodeGen/ARM/ifcvt3.ll | 14 +- test/CodeGen/ARM/ifcvt4.ll | 2 +- test/CodeGen/ARM/ifcvt9.ll | 2 +- test/CodeGen/ARM/illegal-vector-bitcast.ll | 4 +- test/CodeGen/ARM/imm.ll | 5 +- test/CodeGen/ARM/indirect-reg-input.ll | 2 +- test/CodeGen/ARM/indirectbr.ll | 11 +- test/CodeGen/ARM/inline-diagnostics.ll | 16 + test/CodeGen/ARM/inlineasm-64bit.ll | 4 +- test/CodeGen/ARM/inlineasm-imm-arm.ll | 2 +- test/CodeGen/ARM/inlineasm-ldr-pseudo.ll | 17 + .../ARM/inlineasm-switch-mode-oneway-from-arm.ll | 18 + .../ARM/inlineasm-switch-mode-oneway-from-thumb.ll | 18 + test/CodeGen/ARM/inlineasm-switch-mode.ll | 22 + test/CodeGen/ARM/inlineasm.ll | 2 +- test/CodeGen/ARM/inlineasm2.ll | 2 +- test/CodeGen/ARM/inlineasm3.ll | 3 +- test/CodeGen/ARM/inlineasm4.ll | 2 +- test/CodeGen/ARM/insn-sched1.ll | 11 +- test/CodeGen/ARM/integer_insertelement.ll | 2 +- test/CodeGen/ARM/interrupt-attr.ll | 30 +- test/CodeGen/ARM/intrinsics-crypto.ll | 58 +- test/CodeGen/ARM/ispositive.ll | 2 +- test/CodeGen/ARM/large-stack.ll | 2 +- test/CodeGen/ARM/ldaex-stlex.ll | 92 + test/CodeGen/ARM/ldm.ll | 4 +- test/CodeGen/ARM/ldr.ll | 2 +- test/CodeGen/ARM/ldr_ext.ll | 2 +- test/CodeGen/ARM/ldr_frame.ll | 5 +- test/CodeGen/ARM/ldr_post.ll | 4 +- test/CodeGen/ARM/ldr_pre.ll | 4 +- test/CodeGen/ARM/ldrd.ll | 8 +- test/CodeGen/ARM/ldstrex.ll | 28 +- test/CodeGen/ARM/load.ll | 13 +- test/CodeGen/ARM/long-setcc.ll | 7 +- test/CodeGen/ARM/long.ll | 2 +- test/CodeGen/ARM/longMAC.ll | 28 +- test/CodeGen/ARM/long_shift.ll | 2 +- test/CodeGen/ARM/lsr-scale-addr-mode.ll | 5 +- test/CodeGen/ARM/lsr-unfolded-offset.ll | 2 +- test/CodeGen/ARM/machine-licm.ll | 10 +- test/CodeGen/ARM/mature-mc-support.ll | 12 + test/CodeGen/ARM/mem.ll | 8 +- test/CodeGen/ARM/memcpy-inline.ll | 6 +- test/CodeGen/ARM/memfunc.ll | 3 +- test/CodeGen/ARM/minsize-imms.ll | 57 + test/CodeGen/ARM/minsize-litpools.ll | 26 + test/CodeGen/ARM/mls.ll | 5 +- test/CodeGen/ARM/movt-movw-global.ll | 8 +- test/CodeGen/ARM/movt.ll | 2 +- test/CodeGen/ARM/mul_const.ll | 2 +- test/CodeGen/ARM/mulhi.ll | 6 +- test/CodeGen/ARM/mult-alt-generic-arm.ll | 2 +- test/CodeGen/ARM/mvn.ll | 15 +- test/CodeGen/ARM/neon_arith1.ll | 5 +- test/CodeGen/ARM/neon_cmp.ll | 3 +- test/CodeGen/ARM/neon_div.ll | 3 +- test/CodeGen/ARM/neon_fpconv.ll | 2 +- test/CodeGen/ARM/neon_ld1.ll | 2 +- test/CodeGen/ARM/neon_ld2.ll | 4 +- test/CodeGen/ARM/neon_minmax.ll | 2 +- test/CodeGen/ARM/neon_shift.ll | 2 +- test/CodeGen/ARM/neon_vabs.ll | 2 +- test/CodeGen/ARM/none-macho.ll | 101 + test/CodeGen/ARM/noreturn.ll | 17 + test/CodeGen/ARM/optimize-dmbs-v7.ll | 74 + test/CodeGen/ARM/optselect-regclass.ll | 3 +- test/CodeGen/ARM/pack.ll | 2 +- test/CodeGen/ARM/phi.ll | 3 +- test/CodeGen/ARM/popcnt.ll | 2 +- test/CodeGen/ARM/prefetch-thumb.ll | 22 - test/CodeGen/ARM/prefetch.ll | 28 +- test/CodeGen/ARM/reg_sequence.ll | 8 +- test/CodeGen/ARM/ret0.ll | 2 +- test/CodeGen/ARM/ret_arg1.ll | 2 +- test/CodeGen/ARM/ret_arg2.ll | 2 +- test/CodeGen/ARM/ret_arg3.ll | 3 +- test/CodeGen/ARM/ret_arg4.ll | 2 +- test/CodeGen/ARM/ret_arg5.ll | 2 +- test/CodeGen/ARM/ret_f32_arg2.ll | 2 +- test/CodeGen/ARM/ret_f32_arg5.ll | 2 +- test/CodeGen/ARM/ret_f64_arg2.ll | 2 +- test/CodeGen/ARM/ret_f64_arg_reg_split.ll | 2 +- test/CodeGen/ARM/ret_f64_arg_split.ll | 2 +- test/CodeGen/ARM/ret_f64_arg_stack.ll | 2 +- test/CodeGen/ARM/ret_i128_arg2.ll | 2 +- test/CodeGen/ARM/ret_i64_arg2.ll | 2 +- test/CodeGen/ARM/ret_i64_arg3.ll | 2 +- test/CodeGen/ARM/ret_i64_arg_split.ll | 2 +- test/CodeGen/ARM/ret_void.ll | 2 +- test/CodeGen/ARM/returned-ext.ll | 4 +- test/CodeGen/ARM/returned-trunc-tail-calls.ll | 2 +- test/CodeGen/ARM/rev.ll | 2 +- test/CodeGen/ARM/saxpy10-a9.ll | 135 + test/CodeGen/ARM/sbfx.ll | 2 +- test/CodeGen/ARM/segmented-stacks-dynamic.ll | 62 + test/CodeGen/ARM/segmented-stacks.ll | 235 + test/CodeGen/ARM/select-imm.ll | 10 +- test/CodeGen/ARM/select-undef.ll | 3 +- test/CodeGen/ARM/select.ll | 10 +- test/CodeGen/ARM/setcc-sentinals.ll | 4 +- test/CodeGen/ARM/smul.ll | 4 +- test/CodeGen/ARM/ssp-data-layout.ll | 528 + test/CodeGen/ARM/stack-frame.ll | 5 +- test/CodeGen/ARM/str_post.ll | 2 +- test/CodeGen/ARM/str_pre.ll | 8 +- test/CodeGen/ARM/str_trunc.ll | 12 +- test/CodeGen/ARM/struct_byval_arm_t1_t2.ll | 2 +- test/CodeGen/ARM/sub.ll | 2 +- test/CodeGen/ARM/subreg-remat.ll | 2 +- test/CodeGen/ARM/sxt_rot.ll | 2 +- test/CodeGen/ARM/t2-imm.ll | 2 +- test/CodeGen/ARM/tail-call.ll | 21 + test/CodeGen/ARM/taildup-branch-weight.ll | 54 + test/CodeGen/ARM/this-return.ll | 4 +- test/CodeGen/ARM/thumb-litpool.ll | 15 + test/CodeGen/ARM/thumb2-it-block.ll | 2 +- test/CodeGen/ARM/tls-models.ll | 32 +- test/CodeGen/ARM/tls1.ll | 2 +- test/CodeGen/ARM/tls2.ll | 4 +- test/CodeGen/ARM/trunc_ldr.ll | 10 +- test/CodeGen/ARM/truncstore-dag-combine.ll | 7 +- test/CodeGen/ARM/tst_teq.ll | 7 +- test/CodeGen/ARM/twoaddrinstr.ll | 2 +- test/CodeGen/ARM/unaligned_load_store.ll | 11 +- test/CodeGen/ARM/unaligned_load_store_vector.ll | 2 +- test/CodeGen/ARM/unord.ll | 10 +- test/CodeGen/ARM/uxt_rot.ll | 14 +- test/CodeGen/ARM/v1-constant-fold.ll | 4 +- test/CodeGen/ARM/vaba.ll | 2 +- test/CodeGen/ARM/vabd.ll | 2 +- test/CodeGen/ARM/vabs.ll | 10 +- test/CodeGen/ARM/vadd.ll | 2 +- test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll | 31 + test/CodeGen/ARM/vargs.ll | 3 +- test/CodeGen/ARM/vbits.ll | 2 +- test/CodeGen/ARM/vbsl.ll | 2 +- test/CodeGen/ARM/vceq.ll | 2 +- test/CodeGen/ARM/vcge.ll | 10 +- test/CodeGen/ARM/vcgt.ll | 12 +- test/CodeGen/ARM/vcnt.ll | 2 +- test/CodeGen/ARM/vcombine.ll | 2 +- test/CodeGen/ARM/vcvt.ll | 2 +- test/CodeGen/ARM/vdup.ll | 35 +- test/CodeGen/ARM/vext.ll | 2 +- test/CodeGen/ARM/vfcmp.ll | 2 +- test/CodeGen/ARM/vfp-regs-dwarf.ll | 44 + test/CodeGen/ARM/vhadd.ll | 2 +- test/CodeGen/ARM/vhsub.ll | 2 +- test/CodeGen/ARM/vicmp.ll | 2 +- test/CodeGen/ARM/vld1.ll | 6 +- test/CodeGen/ARM/vld2.ll | 2 +- test/CodeGen/ARM/vld3.ll | 17 +- test/CodeGen/ARM/vld4.ll | 15 +- test/CodeGen/ARM/vlddup.ll | 2 +- test/CodeGen/ARM/vldlane.ll | 6 +- test/CodeGen/ARM/vminmax.ll | 2 +- test/CodeGen/ARM/vmla.ll | 2 +- test/CodeGen/ARM/vmls.ll | 2 +- test/CodeGen/ARM/vmov.ll | 2 +- test/CodeGen/ARM/vmul.ll | 2 +- test/CodeGen/ARM/vneg.ll | 2 +- test/CodeGen/ARM/vpadal.ll | 2 +- test/CodeGen/ARM/vpadd.ll | 13 +- test/CodeGen/ARM/vpminmax.ll | 2 +- test/CodeGen/ARM/vqadd.ll | 2 +- test/CodeGen/ARM/vqshl.ll | 2 +- test/CodeGen/ARM/vqshrn.ll | 2 +- test/CodeGen/ARM/vqsub.ll | 2 +- test/CodeGen/ARM/vrec.ll | 2 +- test/CodeGen/ARM/vrev.ll | 2 +- test/CodeGen/ARM/vsel.ll | 8 +- test/CodeGen/ARM/vselect_imax.ll | 2 +- test/CodeGen/ARM/vshift.ll | 34 +- test/CodeGen/ARM/vshiftins.ll | 2 +- test/CodeGen/ARM/vshl.ll | 2 +- test/CodeGen/ARM/vshll.ll | 101 +- test/CodeGen/ARM/vshrn.ll | 49 +- test/CodeGen/ARM/vsra.ll | 36 +- test/CodeGen/ARM/vst1.ll | 2 +- test/CodeGen/ARM/vst2.ll | 2 +- test/CodeGen/ARM/vst3.ll | 14 +- test/CodeGen/ARM/vst4.ll | 14 +- test/CodeGen/ARM/vstlane.ll | 2 +- test/CodeGen/ARM/vsub.ll | 2 +- test/CodeGen/ARM/vtbl.ll | 2 +- test/CodeGen/ARM/vtrn.ll | 2 +- test/CodeGen/ARM/vuzp.ll | 2 +- test/CodeGen/ARM/vzip.ll | 2 +- test/CodeGen/ARM/warn-stack.ll | 2 +- test/CodeGen/ARM/weak.ll | 6 +- test/CodeGen/ARM/weak2.ll | 5 +- test/CodeGen/ARM/zero-cycle-zero.ll | 70 + test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll | 47 + test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll | 45 + .../ARM64/2011-03-21-Unaligned-Frame-Index.ll | 12 + test/CodeGen/ARM64/2011-04-21-CPSRBug.ll | 26 + test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll | 31 + .../CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll | 40 + .../ARM64/2012-05-07-DAGCombineVectorExtract.ll | 20 + test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll | 21 + test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll | 22 + test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll | 50 + test/CodeGen/ARM64/2012-06-06-FPToUI.ll | 65 + test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll | 56 + test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll | 19 + test/CodeGen/ARM64/2013-01-23-frem-crash.ll | 15 + test/CodeGen/ARM64/2013-01-23-sext-crash.ll | 37 + test/CodeGen/ARM64/2013-02-12-shufv8i8.ll | 11 + test/CodeGen/ARM64/AdvSIMD-Scalar.ll | 38 + test/CodeGen/ARM64/aapcs.ll | 86 + test/CodeGen/ARM64/abi-varargs.ll | 191 + test/CodeGen/ARM64/abi.ll | 236 + test/CodeGen/ARM64/abi_align.ll | 529 + test/CodeGen/ARM64/addp.ll | 32 + test/CodeGen/ARM64/addr-mode-folding.ll | 171 + test/CodeGen/ARM64/addr-type-promotion.ll | 82 + test/CodeGen/ARM64/addrmode.ll | 72 + test/CodeGen/ARM64/alloc-no-stack-realign.ll | 21 + test/CodeGen/ARM64/alloca-frame-pointer-offset.ll | 29 + test/CodeGen/ARM64/andCmpBrToTBZ.ll | 72 + test/CodeGen/ARM64/anyregcc-crash.ll | 19 + test/CodeGen/ARM64/anyregcc.ll | 363 + test/CodeGen/ARM64/arith-saturating.ll | 153 + test/CodeGen/ARM64/arith.ll | 262 + test/CodeGen/ARM64/atomic-128.ll | 213 + test/CodeGen/ARM64/atomic.ll | 343 + test/CodeGen/ARM64/basic-pic.ll | 54 + test/CodeGen/ARM64/big-imm-offsets.ll | 14 + test/CodeGen/ARM64/big-stack.ll | 21 + test/CodeGen/ARM64/bitfield-extract.ll | 406 + test/CodeGen/ARM64/blockaddress.ll | 30 + test/CodeGen/ARM64/build-vector.ll | 35 + test/CodeGen/ARM64/call-tailcalls.ll | 91 + test/CodeGen/ARM64/cast-opt.ll | 31 + test/CodeGen/ARM64/ccmp-heuristics.ll | 190 + test/CodeGen/ARM64/ccmp.ll | 289 + test/CodeGen/ARM64/coalesce-ext.ll | 17 + test/CodeGen/ARM64/code-model-large-abs.ll | 72 + test/CodeGen/ARM64/collect-loh-garbage-crash.ll | 37 + test/CodeGen/ARM64/collect-loh-str.ll | 23 + test/CodeGen/ARM64/collect-loh.ll | 47 + test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S | 17 + test/CodeGen/ARM64/complex-ret.ll | 7 + test/CodeGen/ARM64/convert-v2f64-v2i32.ll | 24 + test/CodeGen/ARM64/convert-v2i32-v2f64.ll | 29 + test/CodeGen/ARM64/copy-tuple.ll | 146 + test/CodeGen/ARM64/crc32.ll | 71 + test/CodeGen/ARM64/crypto.ll | 135 + test/CodeGen/ARM64/cse.ll | 59 + test/CodeGen/ARM64/csel.ll | 222 + test/CodeGen/ARM64/cvt.ll | 401 + test/CodeGen/ARM64/dagcombiner-convergence.ll | 19 + test/CodeGen/ARM64/dagcombiner-load-slicing.ll | 102 + test/CodeGen/ARM64/dup.ll | 322 + test/CodeGen/ARM64/early-ifcvt.ll | 423 + test/CodeGen/ARM64/elf-calls.ll | 20 + test/CodeGen/ARM64/elf-constpool.ll | 13 + test/CodeGen/ARM64/elf-globals.ll | 115 + test/CodeGen/ARM64/ext.ll | 101 + test/CodeGen/ARM64/extend-int-to-fp.ll | 19 + test/CodeGen/ARM64/extend.ll | 15 + test/CodeGen/ARM64/extern-weak.ll | 51 + test/CodeGen/ARM64/extload-knownzero.ll | 28 + test/CodeGen/ARM64/extract.ll | 58 + test/CodeGen/ARM64/extract_subvector.ll | 51 + test/CodeGen/ARM64/fast-isel-addr-offset.ll | 47 + test/CodeGen/ARM64/fast-isel-alloca.ll | 24 + test/CodeGen/ARM64/fast-isel-br.ll | 155 + test/CodeGen/ARM64/fast-isel-call.ll | 91 + test/CodeGen/ARM64/fast-isel-conversion.ll | 416 + test/CodeGen/ARM64/fast-isel-fcmp.ll | 146 + test/CodeGen/ARM64/fast-isel-gv.ll | 38 + test/CodeGen/ARM64/fast-isel-icmp.ll | 214 + test/CodeGen/ARM64/fast-isel-indirectbr.ll | 36 + test/CodeGen/ARM64/fast-isel-intrinsic.ll | 135 + test/CodeGen/ARM64/fast-isel-materialize.ll | 27 + test/CodeGen/ARM64/fast-isel-noconvert.ll | 36 + test/CodeGen/ARM64/fast-isel-rem.ll | 33 + test/CodeGen/ARM64/fast-isel-ret.ll | 63 + test/CodeGen/ARM64/fast-isel-select.ll | 63 + test/CodeGen/ARM64/fast-isel.ll | 95 + test/CodeGen/ARM64/fastcc-tailcall.ll | 24 + .../ARM64/fastisel-gep-promote-before-add.ll | 18 + test/CodeGen/ARM64/fcmp-opt.ll | 173 + test/CodeGen/ARM64/fcopysign.ll | 51 + .../ARM64/fixed-point-scalar-cvt-dagcombine.ll | 15 + test/CodeGen/ARM64/fmadd.ll | 92 + test/CodeGen/ARM64/fmax.ll | 21 + test/CodeGen/ARM64/fminv.ll | 101 + test/CodeGen/ARM64/fmuladd.ll | 88 + test/CodeGen/ARM64/fold-address.ll | 79 + test/CodeGen/ARM64/fold-lsl.ll | 79 + test/CodeGen/ARM64/fp-imm.ll | 32 + test/CodeGen/ARM64/fp.ll | 8 + test/CodeGen/ARM64/fp128-folding.ll | 17 + test/CodeGen/ARM64/fp128.ll | 274 + test/CodeGen/ARM64/frame-index.ll | 11 + test/CodeGen/ARM64/frameaddr.ll | 15 + test/CodeGen/ARM64/global-address.ll | 14 + test/CodeGen/ARM64/hello.ll | 38 + test/CodeGen/ARM64/i16-subreg-extract.ll | 12 + test/CodeGen/ARM64/icmp-opt.ll | 17 + test/CodeGen/ARM64/illegal-float-ops.ll | 295 + test/CodeGen/ARM64/indexed-memory.ll | 351 + test/CodeGen/ARM64/inline-asm-error-I.ll | 11 + test/CodeGen/ARM64/inline-asm-error-J.ll | 11 + test/CodeGen/ARM64/inline-asm-error-K.ll | 11 + test/CodeGen/ARM64/inline-asm-error-L.ll | 11 + test/CodeGen/ARM64/inline-asm-error-M.ll | 11 + test/CodeGen/ARM64/inline-asm-error-N.ll | 11 + test/CodeGen/ARM64/inline-asm-zero-reg-error.ll | 11 + test/CodeGen/ARM64/inline-asm.ll | 230 + test/CodeGen/ARM64/join-reserved.ll | 17 + test/CodeGen/ARM64/jumptable.ll | 35 + test/CodeGen/ARM64/ld1.ll | 1345 ++ test/CodeGen/ARM64/ldp.ll | 149 + test/CodeGen/ARM64/ldur.ll | 67 + test/CodeGen/ARM64/ldxr-stxr.ll | 143 + test/CodeGen/ARM64/leaf-compact-unwind.ll | 161 + test/CodeGen/ARM64/leaf.ll | 13 + test/CodeGen/ARM64/lit.local.cfg | 11 + test/CodeGen/ARM64/long-shift.ll | 59 + test/CodeGen/ARM64/memcpy-inline.ll | 112 + test/CodeGen/ARM64/memset-inline.ll | 27 + test/CodeGen/ARM64/memset-to-bzero.ll | 101 + test/CodeGen/ARM64/movi.ll | 202 + test/CodeGen/ARM64/mul.ll | 90 + test/CodeGen/ARM64/neg.ll | 71 + test/CodeGen/ARM64/neon-compare-instructions.ll | 1191 ++ test/CodeGen/ARM64/patchpoint.ll | 163 + test/CodeGen/ARM64/platform-reg.ll | 26 + test/CodeGen/ARM64/popcnt.ll | 43 + test/CodeGen/ARM64/prefetch.ll | 88 + test/CodeGen/ARM64/promote-const.ll | 255 + test/CodeGen/ARM64/redzone.ll | 18 + test/CodeGen/ARM64/register-offset-addressing.ll | 12 + test/CodeGen/ARM64/register-pairing.ll | 53 + test/CodeGen/ARM64/regress-f128csel-flags.ll | 27 + test/CodeGen/ARM64/regress-interphase-shift.ll | 29 + test/CodeGen/ARM64/return-vector.ll | 11 + test/CodeGen/ARM64/returnaddr.ll | 26 + test/CodeGen/ARM64/rev.ll | 221 + test/CodeGen/ARM64/rounding.ll | 208 + test/CodeGen/ARM64/scaled_iv.ll | 38 + test/CodeGen/ARM64/scvt.ll | 830 + test/CodeGen/ARM64/shifted-sext.ll | 277 + test/CodeGen/ARM64/simd-scalar-to-vector.ll | 22 + test/CodeGen/ARM64/simplest-elf.ll | 18 + test/CodeGen/ARM64/sincos.ll | 42 + test/CodeGen/ARM64/sitofp-combine-chains.ll | 22 + test/CodeGen/ARM64/sli-sri-opt.ll | 41 + test/CodeGen/ARM64/smaxv.ll | 74 + test/CodeGen/ARM64/sminv.ll | 74 + test/CodeGen/ARM64/spill-lr.ll | 74 + test/CodeGen/ARM64/spill.ll | 15 + test/CodeGen/ARM64/st1.ll | 676 + test/CodeGen/ARM64/stack-no-frame.ll | 20 + test/CodeGen/ARM64/stackmap.ll | 288 + test/CodeGen/ARM64/stacksave.ll | 20 + test/CodeGen/ARM64/stp.ll | 101 + test/CodeGen/ARM64/strict-align.ll | 25 + test/CodeGen/ARM64/stur.ll | 98 + test/CodeGen/ARM64/subvector-extend.ll | 141 + test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll | 36 + test/CodeGen/ARM64/tbl.ll | 132 + test/CodeGen/ARM64/this-return.ll | 83 + test/CodeGen/ARM64/tls-darwin.ll | 18 + test/CodeGen/ARM64/tls-dynamic-together.ll | 18 + test/CodeGen/ARM64/tls-dynamics.ll | 135 + test/CodeGen/ARM64/tls-execs.ll | 63 + test/CodeGen/ARM64/trap.ll | 8 + test/CodeGen/ARM64/trn.ll | 134 + test/CodeGen/ARM64/trunc-store.ll | 75 + test/CodeGen/ARM64/umaxv.ll | 92 + test/CodeGen/ARM64/uminv.ll | 92 + test/CodeGen/ARM64/umov.ll | 33 + test/CodeGen/ARM64/unaligned_ldst.ll | 41 + test/CodeGen/ARM64/uzp.ll | 107 + test/CodeGen/ARM64/vaargs.ll | 20 + test/CodeGen/ARM64/vabs.ll | 804 + test/CodeGen/ARM64/vadd.ll | 941 ++ test/CodeGen/ARM64/vaddlv.ll | 26 + test/CodeGen/ARM64/vaddv.ll | 233 + test/CodeGen/ARM64/variadic-aapcs.ll | 143 + test/CodeGen/ARM64/vbitwise.ll | 91 + test/CodeGen/ARM64/vclz.ll | 109 + test/CodeGen/ARM64/vcmp.ll | 227 + test/CodeGen/ARM64/vcnt.ll | 56 + test/CodeGen/ARM64/vcombine.ll | 17 + test/CodeGen/ARM64/vcvt.ll | 686 + test/CodeGen/ARM64/vcvt_f.ll | 82 + test/CodeGen/ARM64/vcvt_f32_su32.ll | 73 + test/CodeGen/ARM64/vcvt_n.ll | 49 + test/CodeGen/ARM64/vcvt_su32_f32.ll | 34 + test/CodeGen/ARM64/vcvtxd_f32_f64.ll | 11 + test/CodeGen/ARM64/vecCmpBr.ll | 207 + test/CodeGen/ARM64/vecFold.ll | 145 + test/CodeGen/ARM64/vector-ext.ll | 16 + test/CodeGen/ARM64/vector-imm.ll | 134 + test/CodeGen/ARM64/vector-ldst.ll | 601 + test/CodeGen/ARM64/vext.ll | 464 + test/CodeGen/ARM64/vfloatintrinsics.ll | 375 + test/CodeGen/ARM64/vhadd.ll | 249 + test/CodeGen/ARM64/vhsub.ll | 125 + test/CodeGen/ARM64/virtual_base.ll | 51 + test/CodeGen/ARM64/vmax.ll | 679 + test/CodeGen/ARM64/vminmaxnm.ll | 68 + test/CodeGen/ARM64/vmovn.ll | 242 + test/CodeGen/ARM64/vmul.ll | 2003 +++ test/CodeGen/ARM64/volatile.ll | 27 + test/CodeGen/ARM64/vqadd.ll | 332 + test/CodeGen/ARM64/vqsub.ll | 147 + test/CodeGen/ARM64/vselect.ll | 18 + test/CodeGen/ARM64/vsetcc_fp.ll | 11 + test/CodeGen/ARM64/vshift.ll | 1909 +++ test/CodeGen/ARM64/vshr.ll | 63 + test/CodeGen/ARM64/vshuffle.ll | 115 + test/CodeGen/ARM64/vsqrt.ll | 232 + test/CodeGen/ARM64/vsra.ll | 150 + test/CodeGen/ARM64/vsub.ll | 417 + test/CodeGen/ARM64/weak-reference.ll | 10 + test/CodeGen/ARM64/xaluo.ll | 524 + test/CodeGen/ARM64/zero-cycle-regmov.ll | 17 + test/CodeGen/ARM64/zero-cycle-zeroing.ll | 49 + test/CodeGen/ARM64/zext.ll | 11 + test/CodeGen/ARM64/zextload-unscaled.ll | 40 + test/CodeGen/ARM64/zip.ll | 107 + test/CodeGen/CPP/attributes.ll | 7 + .../Generic/2007-04-08-MultipleFrameIndices.ll | 2 +- .../CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll | 2 +- test/CodeGen/Generic/2007-04-27-LargeMemObject.ll | 2 +- test/CodeGen/Generic/2007-12-17-InvokeAsm.ll | 2 +- test/CodeGen/Generic/2008-02-20-MatchingMem.ll | 2 +- test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll | 19 + test/CodeGen/Generic/asm-large-immediate.ll | 2 +- test/CodeGen/Generic/inline-asm-mem-clobber.ll | 2 +- test/CodeGen/Generic/inline-asm-special-strings.ll | 2 +- test/CodeGen/Generic/no-target.ll | 3 + test/CodeGen/Generic/print-after.ll | 2 +- test/CodeGen/Hexagon/hwloop-dbg.ll | 5 +- test/CodeGen/Hexagon/packetize_cond_inst.ll | 2 +- test/CodeGen/MSP430/fp.ll | 12 + test/CodeGen/MSP430/misched-msp430.ll | 20 + test/CodeGen/Mips/2008-07-16-SignExtInReg.ll | 6 +- test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll | 8 +- test/CodeGen/Mips/abicalls.ll | 15 + test/CodeGen/Mips/addi.ll | 2 +- test/CodeGen/Mips/align16.ll | 8 +- test/CodeGen/Mips/alloca16.ll | 4 +- test/CodeGen/Mips/atomic.ll | 6 +- test/CodeGen/Mips/atomicops.ll | 2 +- test/CodeGen/Mips/blez_bgez.ll | 2 +- test/CodeGen/Mips/blockaddr.ll | 16 +- test/CodeGen/Mips/bswap.ll | 83 +- test/CodeGen/Mips/buildpairextractelementf64.ll | 8 +- test/CodeGen/Mips/cache-intrinsic.ll | 26 + test/CodeGen/Mips/call-optimization.ll | 91 + test/CodeGen/Mips/ci2.ll | 39 + test/CodeGen/Mips/cmov.ll | 59 +- test/CodeGen/Mips/const-mult.ll | 3 +- test/CodeGen/Mips/const4a.ll | 4 +- test/CodeGen/Mips/const6.ll | 2 +- test/CodeGen/Mips/const6a.ll | 2 +- test/CodeGen/Mips/ctlz.ll | 2 +- test/CodeGen/Mips/elf_eflags.ll | 83 + test/CodeGen/Mips/elf_st_other.ll | 12 + test/CodeGen/Mips/ex2.ll | 9 +- test/CodeGen/Mips/f16abs.ll | 2 +- test/CodeGen/Mips/fastcc.ll | 13 + test/CodeGen/Mips/fixdfsf.ll | 4 +- test/CodeGen/Mips/fmadd1.ll | 8 +- test/CodeGen/Mips/fp-indexed-ls.ll | 6 + test/CodeGen/Mips/fp16instrinsmc.ll | 4 +- test/CodeGen/Mips/fp16mix.ll | 20 +- test/CodeGen/Mips/fp16static.ll | 2 +- test/CodeGen/Mips/fpneeded.ll | 16 +- test/CodeGen/Mips/fpnotneeded.ll | 16 +- test/CodeGen/Mips/fptr2.ll | 2 +- test/CodeGen/Mips/global-address.ll | 8 +- test/CodeGen/Mips/helloworld.ll | 9 +- test/CodeGen/Mips/hf16_1.ll | 4 +- test/CodeGen/Mips/hf16call32.ll | 2 +- test/CodeGen/Mips/hf16call32_body.ll | 2 +- test/CodeGen/Mips/hf1_body.ll | 2 +- test/CodeGen/Mips/hfptrcall.ll | 2 +- test/CodeGen/Mips/i32k.ll | 2 +- test/CodeGen/Mips/l3mc.ll | 114 + test/CodeGen/Mips/largefr1.ll | 74 - test/CodeGen/Mips/lcb2.ll | 133 + test/CodeGen/Mips/lcb3c.ll | 59 + test/CodeGen/Mips/lcb4a.ll | 69 + test/CodeGen/Mips/lcb5.ll | 240 + test/CodeGen/Mips/mature-mc-support.ll | 32 + test/CodeGen/Mips/mbrsize4a.ll | 37 + test/CodeGen/Mips/micromips-atomic.ll | 18 + test/CodeGen/Mips/micromips-jal.ll | 48 + .../Mips/micromips-load-effective-address.ll | 29 + test/CodeGen/Mips/micromips-long-branch.ll | 16437 +++++++++++++++++++ test/CodeGen/Mips/mips16-hf-attr.ll | 45 + test/CodeGen/Mips/mips16_32_1.ll | 5 +- test/CodeGen/Mips/mips16_32_10.ll | 9 +- test/CodeGen/Mips/mips16_32_3.ll | 21 +- test/CodeGen/Mips/mips16_32_4.ll | 24 +- test/CodeGen/Mips/mips16_32_5.ll | 18 +- test/CodeGen/Mips/mips16_32_6.ll | 15 +- test/CodeGen/Mips/mips16_32_7.ll | 21 +- test/CodeGen/Mips/mips16_32_8.ll | 9 +- test/CodeGen/Mips/mips16_32_9.ll | 12 +- test/CodeGen/Mips/mips16_fpret.ll | 8 +- test/CodeGen/Mips/mips16fpe.ll | 2 +- test/CodeGen/Mips/mips64fpldst.ll | 4 +- test/CodeGen/Mips/mips64intldst.ll | 4 +- test/CodeGen/Mips/msa/2r_vector_scalar.ll | 69 +- test/CodeGen/Mips/msa/3r-s.ll | 86 +- test/CodeGen/Mips/msa/arithmetic_float.ll | 3 +- test/CodeGen/Mips/msa/basic_operations.ll | 30 +- test/CodeGen/Mips/msa/basic_operations_float.ll | 45 +- test/CodeGen/Mips/msa/bitwise.ll | 5 +- test/CodeGen/Mips/msa/compare.ll | 34 +- test/CodeGen/Mips/msa/compare_float.ll | 28 +- test/CodeGen/Mips/msa/elm_copy.ll | 136 +- test/CodeGen/Mips/msa/elm_insv.ll | 138 +- test/CodeGen/Mips/msa/elm_shift_slide.ll | 32 +- test/CodeGen/Mips/msa/frameindex.ll | 309 + .../Mips/msa/llvm-stress-s449609655-simplified.ll | 2 +- test/CodeGen/Mips/msa/shift-dagcombine.ll | 3 +- test/CodeGen/Mips/msa/shuffle.ll | 46 +- test/CodeGen/Mips/msa/special.ll | 40 +- test/CodeGen/Mips/msa/vec.ll | 168 +- test/CodeGen/Mips/nacl-align.ll | 96 + test/CodeGen/Mips/nacl-branch-delay.ll | 71 + test/CodeGen/Mips/nacl-reserved-regs.ll | 51 + test/CodeGen/Mips/nomips16.ll | 2 +- test/CodeGen/Mips/null.ll | 2 +- test/CodeGen/Mips/octeon.ll | 29 + test/CodeGen/Mips/octeon_popcnt.ll | 47 + test/CodeGen/Mips/optimize-pic-o0.ll | 33 + test/CodeGen/Mips/powif64_16.ll | 2 +- test/CodeGen/Mips/rotate.ll | 2 +- test/CodeGen/Mips/s2rem.ll | 92 + test/CodeGen/Mips/sel1c.ll | 2 +- test/CodeGen/Mips/sel2c.ll | 2 +- test/CodeGen/Mips/sr1.ll | 60 + test/CodeGen/Mips/tail16.ll | 20 + test/CodeGen/Mips/tls.ll | 6 +- test/CodeGen/Mips/trap1.ll | 2 +- test/CodeGen/NVPTX/addrspacecast.ll | 99 + test/CodeGen/NVPTX/aggr-param.ll | 20 + test/CodeGen/NVPTX/bug17709.ll | 2 +- test/CodeGen/NVPTX/call-with-alloca-buffer.ll | 66 + test/CodeGen/NVPTX/div-ri.ll | 8 + test/CodeGen/NVPTX/ldparam-v4.ll | 10 + test/CodeGen/NVPTX/noduplicate-syncthreads.ll | 74 + test/CodeGen/NVPTX/symbol-naming.ll | 31 + test/CodeGen/NVPTX/vec-param-load.ll | 6 +- .../PowerPC/2007-04-24-InlineAsm-I-Modifier.ll | 4 +- .../PowerPC/2007-05-03-InlineAsm-S-Constraint.ll | 2 +- .../CodeGen/PowerPC/2007-11-16-landingpad-split.ll | 2 +- test/CodeGen/PowerPC/2008-12-12-EH.ll | 9 - test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll | 8 - test/CodeGen/PowerPC/Atomics-32.ll | 32 +- test/CodeGen/PowerPC/Atomics-64.ll | 32 +- test/CodeGen/PowerPC/aa-tbaa.ll | 41 + test/CodeGen/PowerPC/anon_aggr.ll | 6 +- test/CodeGen/PowerPC/atomic-1.ll | 2 +- test/CodeGen/PowerPC/atomic-2.ll | 2 +- test/CodeGen/PowerPC/bdzlr.ll | 9 +- test/CodeGen/PowerPC/byval-agg-info.ll | 17 + test/CodeGen/PowerPC/coalesce-ext.ll | 2 +- test/CodeGen/PowerPC/complex-return.ll | 2 +- test/CodeGen/PowerPC/crash.ll | 17 + test/CodeGen/PowerPC/crbit-asm.ll | 59 + test/CodeGen/PowerPC/crbits.ll | 192 + test/CodeGen/PowerPC/ctrloop-large-ec.ll | 2 +- test/CodeGen/PowerPC/ctrloop-udivti3.ll | 31 + test/CodeGen/PowerPC/dbg.ll | 2 +- test/CodeGen/PowerPC/early-ret2.ll | 6 +- test/CodeGen/PowerPC/fast-isel-conversion-p5.ll | 153 + test/CodeGen/PowerPC/float-to-int.ll | 49 + test/CodeGen/PowerPC/fold-zero.ll | 13 +- test/CodeGen/PowerPC/hello-reloc.s | 50 +- test/CodeGen/PowerPC/i1-to-double.ll | 21 + test/CodeGen/PowerPC/i32-to-float.ll | 25 + test/CodeGen/PowerPC/i64-to-float.ll | 25 + test/CodeGen/PowerPC/inlineasm-copy.ll | 2 +- test/CodeGen/PowerPC/jaggedstructs.ll | 2 +- test/CodeGen/PowerPC/lsa.ll | 2 +- test/CodeGen/PowerPC/mature-mc-support.ll | 27 + test/CodeGen/PowerPC/optcmp.ll | 2 +- test/CodeGen/PowerPC/ppc32-i1-vaarg.ll | 20 + test/CodeGen/PowerPC/ppc32-vacopy.ll | 2 +- test/CodeGen/PowerPC/pr17168.ll | 2 +- test/CodeGen/PowerPC/private.ll | 28 +- test/CodeGen/PowerPC/pwr7-gt-nop.ll | 31 + test/CodeGen/PowerPC/rlwimi-and.ll | 2 +- test/CodeGen/PowerPC/sdag-ppcf128.ll | 2 +- test/CodeGen/PowerPC/setcc_no_zext.ll | 4 + test/CodeGen/PowerPC/seteq-0.ll | 7 +- test/CodeGen/PowerPC/sjlj.ll | 4 +- test/CodeGen/PowerPC/spill-nor0.ll | 23 + test/CodeGen/PowerPC/srl-mask.ll | 16 + test/CodeGen/PowerPC/stfiwx.ll | 35 +- test/CodeGen/PowerPC/structsinmem.ll | 2 +- test/CodeGen/PowerPC/structsinregs.ll | 2 +- test/CodeGen/PowerPC/subsumes-pred-regs.ll | 2 +- test/CodeGen/PowerPC/tls-2.ll | 15 - test/CodeGen/PowerPC/tls-gd.ll | 23 - test/CodeGen/PowerPC/tls-ie.ll | 22 - test/CodeGen/PowerPC/tls-ld-2.ll | 24 - test/CodeGen/PowerPC/tls-ld.ll | 24 - test/CodeGen/PowerPC/tls-pic.ll | 55 + test/CodeGen/PowerPC/tls.ll | 33 +- test/CodeGen/PowerPC/unaligned.ll | 8 +- test/CodeGen/PowerPC/unwind-dw2-g.ll | 2 +- test/CodeGen/PowerPC/vec_cmp.ll | 2 +- test/CodeGen/PowerPC/vsx-args.ll | 26 + test/CodeGen/PowerPC/vsx-fma-m.ll | 238 + test/CodeGen/PowerPC/vsx-self-copy.ll | 27 + test/CodeGen/PowerPC/vsx-spill.ll | 49 + test/CodeGen/PowerPC/vsx.ll | 651 + test/CodeGen/PowerPC/vtable-reloc.ll | 11 + test/CodeGen/PowerPC/weak_def_can_be_hidden.ll | 50 + test/CodeGen/R600/32-bit-local-address-space.ll | 64 +- test/CodeGen/R600/add.ll | 102 +- test/CodeGen/R600/add_i64.ll | 39 +- test/CodeGen/R600/address-space.ll | 9 +- test/CodeGen/R600/anyext.ll | 14 + test/CodeGen/R600/array-ptr-calc-i32.ll | 31 + test/CodeGen/R600/atomic_load_add.ll | 4 +- test/CodeGen/R600/atomic_load_sub.ll | 4 +- test/CodeGen/R600/basic-branch.ll | 15 + test/CodeGen/R600/basic-loop.ll | 18 + test/CodeGen/R600/bfe_uint.ll | 2 + test/CodeGen/R600/bitcast.ll | 9 + test/CodeGen/R600/cayman-loop-bug.ll | 32 + test/CodeGen/R600/cf-stack-bug.ll | 227 + test/CodeGen/R600/codegen-prepare-addrmode-sext.ll | 19 + test/CodeGen/R600/elf.r600.ll | 2 +- test/CodeGen/R600/extload.ll | 91 +- test/CodeGen/R600/fabs.ll | 14 +- test/CodeGen/R600/fadd.ll | 37 +- test/CodeGen/R600/fceil.ll | 84 + test/CodeGen/R600/ffloor.ll | 84 + test/CodeGen/R600/fneg-fabs.ll | 55 + test/CodeGen/R600/fneg.ll | 14 +- test/CodeGen/R600/ftrunc.ll | 84 + test/CodeGen/R600/gep-address-space.ll | 14 +- test/CodeGen/R600/gv-const-addrspace.ll | 41 + test/CodeGen/R600/icmp64.ll | 92 + test/CodeGen/R600/indirect-private-64.ll | 75 + test/CodeGen/R600/infinite-loop-evergreen.ll | 10 + test/CodeGen/R600/infinite-loop.ll | 17 + test/CodeGen/R600/insert_vector_elt.ll | 179 +- test/CodeGen/R600/insert_vector_elt_f64.ll | 36 + test/CodeGen/R600/jump-address.ll | 2 +- test/CodeGen/R600/lds-oqap-crash.ll | 28 + test/CodeGen/R600/lds-output-queue.ll | 2 +- test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll | 40 + test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll | 40 + test/CodeGen/R600/llvm.AMDGPU.bfi.ll | 41 + test/CodeGen/R600/llvm.AMDGPU.bfm.ll | 40 + test/CodeGen/R600/llvm.AMDGPU.imax.ll | 29 +- test/CodeGen/R600/llvm.AMDGPU.imin.ll | 29 +- test/CodeGen/R600/llvm.AMDGPU.kill.ll | 22 + test/CodeGen/R600/llvm.AMDGPU.umax.ll | 44 +- test/CodeGen/R600/llvm.AMDGPU.umin.ll | 44 +- test/CodeGen/R600/llvm.SI.load.dword.ll | 40 + test/CodeGen/R600/llvm.SI.sample-masked.ll | 16 +- test/CodeGen/R600/llvm.SI.sample.ll | 6 +- test/CodeGen/R600/llvm.SI.sampled.ll | 4 +- test/CodeGen/R600/llvm.SI.sendmsg.ll | 21 + test/CodeGen/R600/llvm.SI.tbuffer.store.ll | 18 +- test/CodeGen/R600/llvm.exp2.ll | 26 + test/CodeGen/R600/llvm.pow.ll | 29 +- test/CodeGen/R600/llvm.trunc.ll | 13 + test/CodeGen/R600/load.ll | 197 +- test/CodeGen/R600/load64.ll | 20 +- test/CodeGen/R600/local-64.ll | 158 + test/CodeGen/R600/local-memory-two-objects.ll | 11 +- test/CodeGen/R600/local-memory.ll | 6 +- test/CodeGen/R600/loop-idiom.ll | 54 + test/CodeGen/R600/mad_uint24.ll | 16 +- test/CodeGen/R600/mubuf.ll | 98 + test/CodeGen/R600/mul.ll | 12 + test/CodeGen/R600/mul_uint24.ll | 16 +- test/CodeGen/R600/or.ll | 134 +- test/CodeGen/R600/private-memory.ll | 121 +- ...-infinite-loop-bug-while-reorganizing-vector.ll | 59 + test/CodeGen/R600/register-count-comments.ll | 20 + test/CodeGen/R600/salu-to-valu.ll | 48 + .../R600/schedule-vs-if-nested-loop-failure.ll | 162 + test/CodeGen/R600/select-vectors.ll | 155 + test/CodeGen/R600/select64.ll | 15 + test/CodeGen/R600/setcc-equivalent.ll | 30 + test/CodeGen/R600/sext-in-reg.ll | 271 + test/CodeGen/R600/si-annotate-cf-assertion.ll | 1 + test/CodeGen/R600/si-sgpr-spill.ll | 880 +- test/CodeGen/R600/smrd.ll | 80 + test/CodeGen/R600/store-v3i32.ll | 12 + test/CodeGen/R600/store-v3i64.ll | 28 + test/CodeGen/R600/store-vector-ptrs.ll | 1 + test/CodeGen/R600/store.ll | 23 +- test/CodeGen/R600/trunc-store-i1.ll | 32 + test/CodeGen/R600/trunc.ll | 36 +- .../R600/unhandled-loop-condition-assertion.ll | 114 + test/CodeGen/R600/unroll.ll | 37 + test/CodeGen/R600/v1i64-kernel-arg.ll | 17 + test/CodeGen/R600/v_cndmask.ll | 13 + test/CodeGen/R600/vtx-fetch-branch.ll | 29 + test/CodeGen/R600/vtx-schedule.ll | 4 +- test/CodeGen/R600/xor.ll | 18 + test/CodeGen/R600/zero_extend.ll | 10 + test/CodeGen/SPARC/2009-08-28-PIC.ll | 40 +- test/CodeGen/SPARC/2011-01-11-Call.ll | 8 +- test/CodeGen/SPARC/2011-01-11-FrameAddr.ll | 40 +- test/CodeGen/SPARC/2011-01-19-DelaySlot.ll | 24 +- test/CodeGen/SPARC/64abi.ll | 59 +- test/CodeGen/SPARC/64bit.ll | 20 +- test/CodeGen/SPARC/64cond.ll | 7 +- test/CodeGen/SPARC/64spill.ll | 116 + test/CodeGen/SPARC/atomics.ll | 153 + test/CodeGen/SPARC/constpool.ll | 16 +- test/CodeGen/SPARC/ctpop.ll | 25 +- test/CodeGen/SPARC/exception.ll | 119 +- test/CodeGen/SPARC/fp128.ll | 19 +- test/CodeGen/SPARC/globals.ll | 10 +- test/CodeGen/SPARC/inlineasm.ll | 45 + test/CodeGen/SPARC/leafproc.ll | 12 +- test/CodeGen/SPARC/mature-mc-support.ll | 20 + test/CodeGen/SPARC/missinglabel.ll | 23 + test/CodeGen/SPARC/obj-relocs.ll | 31 + test/CodeGen/SPARC/parts.ll | 14 + test/CodeGen/SPARC/rem.ll | 4 +- test/CodeGen/SPARC/setjmp.ll | 2 +- test/CodeGen/SPARC/spillsize.ll | 25 + test/CodeGen/SPARC/tls.ll | 54 +- test/CodeGen/SPARC/trap.ll | 11 + test/CodeGen/SystemZ/Large/branch-range-01.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-02.py | 2 +- test/CodeGen/SystemZ/Large/branch-range-03.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-04.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-05.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-06.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-09.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-10.py | 4 +- test/CodeGen/SystemZ/Large/branch-range-11.py | 8 +- test/CodeGen/SystemZ/Large/branch-range-12.py | 8 +- test/CodeGen/SystemZ/alias-01.ll | 3 + test/CodeGen/SystemZ/atomic-load-01.ll | 5 +- test/CodeGen/SystemZ/atomic-load-02.ll | 5 +- test/CodeGen/SystemZ/atomic-load-03.ll | 8 +- test/CodeGen/SystemZ/atomic-load-04.ll | 8 +- test/CodeGen/SystemZ/atomic-store-01.ll | 5 +- test/CodeGen/SystemZ/atomic-store-02.ll | 5 +- test/CodeGen/SystemZ/atomic-store-03.ll | 8 +- test/CodeGen/SystemZ/atomic-store-04.ll | 8 +- test/CodeGen/SystemZ/atomicrmw-add-05.ll | 64 + test/CodeGen/SystemZ/atomicrmw-add-06.ll | 64 + test/CodeGen/SystemZ/atomicrmw-and-05.ll | 64 + test/CodeGen/SystemZ/atomicrmw-and-06.ll | 64 + test/CodeGen/SystemZ/atomicrmw-or-05.ll | 64 + test/CodeGen/SystemZ/atomicrmw-or-06.ll | 64 + test/CodeGen/SystemZ/atomicrmw-sub-05.ll | 69 + test/CodeGen/SystemZ/atomicrmw-sub-06.ll | 69 + test/CodeGen/SystemZ/atomicrmw-xor-05.ll | 64 + test/CodeGen/SystemZ/atomicrmw-xor-06.ll | 64 + test/CodeGen/SystemZ/cmpxchg-01.ll | 4 +- test/CodeGen/SystemZ/cmpxchg-02.ll | 4 +- test/CodeGen/SystemZ/cmpxchg-03.ll | 24 +- test/CodeGen/SystemZ/cmpxchg-04.ll | 18 +- test/CodeGen/SystemZ/cond-store-01.ll | 7 +- test/CodeGen/SystemZ/cond-store-02.ll | 7 +- test/CodeGen/SystemZ/cond-store-03.ll | 4 +- test/CodeGen/SystemZ/cond-store-04.ll | 4 +- test/CodeGen/SystemZ/fp-cmp-04.ll | 61 +- test/CodeGen/SystemZ/fp-conv-06.ll | 4 +- test/CodeGen/SystemZ/fp-conv-08.ll | 4 +- test/CodeGen/SystemZ/fp-conv-10.ll | 4 +- test/CodeGen/SystemZ/fp-conv-12.ll | 4 +- test/CodeGen/SystemZ/fp-conv-13.ll | 64 + test/CodeGen/SystemZ/fp-conv-14.ll | 63 + test/CodeGen/SystemZ/frame-08.ll | 4 +- test/CodeGen/SystemZ/frame-11.ll | 13 +- test/CodeGen/SystemZ/frame-13.ll | 13 +- test/CodeGen/SystemZ/frame-14.ll | 14 +- test/CodeGen/SystemZ/insert-06.ll | 14 + test/CodeGen/SystemZ/int-abs-01.ll | 64 + test/CodeGen/SystemZ/int-cmp-05.ll | 17 +- test/CodeGen/SystemZ/int-cmp-06.ll | 30 +- test/CodeGen/SystemZ/int-cmp-44.ll | 92 +- test/CodeGen/SystemZ/int-cmp-45.ll | 2 +- test/CodeGen/SystemZ/int-cmp-47.ll | 109 + test/CodeGen/SystemZ/int-neg-02.ll | 133 + test/CodeGen/SystemZ/mature-mc-support.ll | 15 + test/CodeGen/SystemZ/risbg-01.ll | 26 +- test/CodeGen/SystemZ/rnsbg-01.ll | 11 + test/CodeGen/SystemZ/rosbg-01.ll | 11 + test/CodeGen/SystemZ/rxsbg-01.ll | 11 + test/CodeGen/SystemZ/selectcc-01.ll | 178 + test/CodeGen/SystemZ/selectcc-02.ll | 178 + test/CodeGen/SystemZ/selectcc-03.ll | 187 + test/CodeGen/SystemZ/serialize-01.ll | 21 + test/CodeGen/SystemZ/shift-04.ll | 101 + test/CodeGen/SystemZ/shift-10.ll | 19 +- test/CodeGen/SystemZ/spill-01.ll | 2 +- test/CodeGen/Thumb/cortex-m0-unaligned-access.ll | 13 + test/CodeGen/Thumb/inlineasm-imm-thumb.ll | 2 +- test/CodeGen/Thumb/mature-mc-support.ll | 12 + test/CodeGen/Thumb/segmented-stacks-dynamic.ll | 63 + test/CodeGen/Thumb/segmented-stacks.ll | 247 + test/CodeGen/Thumb/sjljehprepare-lower-vector.ll | 23 + test/CodeGen/Thumb/triple.ll | 7 + test/CodeGen/Thumb/unord.ll | 11 +- .../Thumb2/2011-12-16-T2SizeReduceAssert.ll | 23 +- test/CodeGen/Thumb2/bfx.ll | 2 +- test/CodeGen/Thumb2/carry.ll | 2 +- test/CodeGen/Thumb2/cortex-fp.ll | 10 +- test/CodeGen/Thumb2/div.ll | 2 +- test/CodeGen/Thumb2/large-stack.ll | 6 +- test/CodeGen/Thumb2/longMACt.ll | 2 +- test/CodeGen/Thumb2/mul_const.ll | 2 +- test/CodeGen/Thumb2/segmented-stacks.ll | 32 + test/CodeGen/Thumb2/tail-call-r9.ll | 2 +- test/CodeGen/Thumb2/thumb2-adc.ll | 2 +- test/CodeGen/Thumb2/thumb2-add.ll | 2 +- test/CodeGen/Thumb2/thumb2-add2.ll | 2 +- test/CodeGen/Thumb2/thumb2-add3.ll | 2 +- test/CodeGen/Thumb2/thumb2-add4.ll | 2 +- test/CodeGen/Thumb2/thumb2-add5.ll | 2 +- test/CodeGen/Thumb2/thumb2-add6.ll | 2 +- test/CodeGen/Thumb2/thumb2-and.ll | 2 +- test/CodeGen/Thumb2/thumb2-and2.ll | 2 +- test/CodeGen/Thumb2/thumb2-asr.ll | 2 +- test/CodeGen/Thumb2/thumb2-asr2.ll | 2 +- test/CodeGen/Thumb2/thumb2-bcc.ll | 2 +- test/CodeGen/Thumb2/thumb2-bfc.ll | 2 +- test/CodeGen/Thumb2/thumb2-bic.ll | 2 +- test/CodeGen/Thumb2/thumb2-clz.ll | 2 +- test/CodeGen/Thumb2/thumb2-cmn.ll | 2 +- test/CodeGen/Thumb2/thumb2-cmn2.ll | 2 +- test/CodeGen/Thumb2/thumb2-cmp.ll | 2 +- test/CodeGen/Thumb2/thumb2-cmp2.ll | 2 +- test/CodeGen/Thumb2/thumb2-eor.ll | 2 +- test/CodeGen/Thumb2/thumb2-eor2.ll | 2 +- test/CodeGen/Thumb2/thumb2-jtb.ll | 2 +- test/CodeGen/Thumb2/thumb2-ldr.ll | 2 +- test/CodeGen/Thumb2/thumb2-ldr_ext.ll | 8 +- test/CodeGen/Thumb2/thumb2-ldr_post.ll | 2 +- test/CodeGen/Thumb2/thumb2-ldr_pre.ll | 4 +- test/CodeGen/Thumb2/thumb2-ldrb.ll | 2 +- test/CodeGen/Thumb2/thumb2-ldrh.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsl.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsl2.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsr.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsr2.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsr3.ll | 2 +- test/CodeGen/Thumb2/thumb2-mla.ll | 5 +- test/CodeGen/Thumb2/thumb2-mls.ll | 2 +- test/CodeGen/Thumb2/thumb2-mov.ll | 2 +- test/CodeGen/Thumb2/thumb2-mul.ll | 2 +- test/CodeGen/Thumb2/thumb2-mulhi.ll | 2 +- test/CodeGen/Thumb2/thumb2-mvn2.ll | 2 +- test/CodeGen/Thumb2/thumb2-neg.ll | 2 +- test/CodeGen/Thumb2/thumb2-orn.ll | 2 +- test/CodeGen/Thumb2/thumb2-orn2.ll | 2 +- test/CodeGen/Thumb2/thumb2-orr.ll | 2 +- test/CodeGen/Thumb2/thumb2-orr2.ll | 2 +- test/CodeGen/Thumb2/thumb2-pack.ll | 2 +- test/CodeGen/Thumb2/thumb2-rev.ll | 2 +- test/CodeGen/Thumb2/thumb2-rev16.ll | 2 +- test/CodeGen/Thumb2/thumb2-ror.ll | 2 +- test/CodeGen/Thumb2/thumb2-rsb.ll | 2 +- test/CodeGen/Thumb2/thumb2-rsb2.ll | 2 +- test/CodeGen/Thumb2/thumb2-sbc.ll | 2 +- test/CodeGen/Thumb2/thumb2-select.ll | 2 +- test/CodeGen/Thumb2/thumb2-select_xform.ll | 2 +- test/CodeGen/Thumb2/thumb2-smla.ll | 4 +- test/CodeGen/Thumb2/thumb2-smul.ll | 2 +- test/CodeGen/Thumb2/thumb2-str.ll | 2 +- test/CodeGen/Thumb2/thumb2-str_post.ll | 2 +- test/CodeGen/Thumb2/thumb2-str_pre.ll | 2 +- test/CodeGen/Thumb2/thumb2-strb.ll | 2 +- test/CodeGen/Thumb2/thumb2-strh.ll | 2 +- test/CodeGen/Thumb2/thumb2-sub.ll | 2 +- test/CodeGen/Thumb2/thumb2-sub2.ll | 2 +- test/CodeGen/Thumb2/thumb2-sub3.ll | 2 +- test/CodeGen/Thumb2/thumb2-sub4.ll | 2 +- test/CodeGen/Thumb2/thumb2-sub5.ll | 3 +- test/CodeGen/Thumb2/thumb2-sxt_rot.ll | 2 +- test/CodeGen/Thumb2/thumb2-teq.ll | 2 +- test/CodeGen/Thumb2/thumb2-teq2.ll | 2 +- test/CodeGen/Thumb2/thumb2-tst.ll | 2 +- test/CodeGen/Thumb2/thumb2-tst2.ll | 2 +- test/CodeGen/Thumb2/tls1.ll | 2 +- test/CodeGen/Thumb2/tls2.ll | 4 +- test/CodeGen/Thumb2/v8_IT_5.ll | 3 +- test/CodeGen/Thumb2/v8_IT_6.ll | 100 + test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll | 2 +- test/CodeGen/X86/2006-07-20-InlineAsm.ll | 2 +- test/CodeGen/X86/2006-07-31-SingleRegClass.ll | 2 +- test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll | 2 +- test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll | 2 +- test/CodeGen/X86/2007-05-05-Personality.ll | 13 +- test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll | 67 - test/CodeGen/X86/2007-10-17-IllegalAsm.ll | 87 - .../CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll | 2 +- test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll | 2 +- .../X86/2007-11-04-rip-immediate-constant.ll | 2 +- test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll | 2 +- test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll | 2 +- test/CodeGen/X86/2008-03-14-SpillerCrash.ll | 2 +- test/CodeGen/X86/2008-04-02-unnamedEH.ll | 6 +- test/CodeGen/X86/2008-04-08-CoalescerCrash.ll | 2 +- test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll | 2 +- test/CodeGen/X86/2008-08-31-EH_RETURN64.ll | 2 +- test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 6 +- test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll | 4 +- test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll | 4 +- test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll | 10 - .../X86/2009-02-12-InlineAsm-nieZ-constraints.ll | 2 +- test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll | 2 +- test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll | 2 +- test/CodeGen/X86/2009-06-05-VZextByteShort.ll | 2 +- test/CodeGen/X86/2009-08-23-linkerprivate.ll | 8 - test/CodeGen/X86/2009-09-19-earlyclobber.ll | 2 +- test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll | 2 +- test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll | 2 +- .../X86/2010-05-05-LocalAllocEarlyClobber.ll | 2 +- test/CodeGen/X86/2010-05-26-DotDebugLoc.ll | 4 +- .../X86/2010-06-15-FastAllocEarlyCLobber.ll | 2 +- test/CodeGen/X86/2010-06-25-asm-RA-crash.ll | 2 +- .../CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll | 2 +- .../CodeGen/X86/2010-06-28-matched-g-constraint.ll | 2 +- test/CodeGen/X86/2010-07-02-asm-alignstack.ll | 2 +- test/CodeGen/X86/2010-07-06-asm-RIP.ll | 2 +- test/CodeGen/X86/2010-07-13-indirectXconstraint.ll | 2 +- test/CodeGen/X86/2010-09-16-EmptyFilename.ll | 2 +- test/CodeGen/X86/2010-10-08-cmpxchg8b.ll | 2 +- test/CodeGen/X86/2010-12-02-MC-Set.ll | 27 - test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll | 20 +- test/CodeGen/X86/2011-05-09-loaduse.ll | 2 +- test/CodeGen/X86/2011-10-11-SpillDead.ll | 2 +- test/CodeGen/X86/2011-10-19-widen_vselect.ll | 18 +- test/CodeGen/X86/2011-12-28-vselecti8.ll | 19 +- test/CodeGen/X86/2012-08-17-legalizer-crash.ll | 2 +- test/CodeGen/X86/2012-11-30-handlemove-dbg.ll | 6 +- test/CodeGen/X86/2012-11-30-misched-dbg.ll | 7 +- .../X86/2013-10-14-FastISel-incorrect-vreg.ll | 8 +- test/CodeGen/X86/3addr-16bit.ll | 6 +- test/CodeGen/X86/Atomics-64.ll | 40 +- test/CodeGen/X86/GC/ocaml-gc.ll | 6 +- test/CodeGen/X86/MachineBranchProb.ll | 34 + test/CodeGen/X86/MachineSink-DbgValue.ll | 4 +- test/CodeGen/X86/alias-error.ll | 5 - test/CodeGen/X86/anyregcc-crash.ll | 4 +- test/CodeGen/X86/anyregcc.ll | 184 +- test/CodeGen/X86/asm-block-labels.ll | 2 +- test/CodeGen/X86/asm-global-imm.ll | 2 +- test/CodeGen/X86/atom-cmpb.ll | 36 + test/CodeGen/X86/atomic16.ll | 2 +- test/CodeGen/X86/atomic32.ll | 2 +- test/CodeGen/X86/atomic64.ll | 2 +- test/CodeGen/X86/atomic6432.ll | 2 +- test/CodeGen/X86/atomic8.ll | 2 +- test/CodeGen/X86/atomic_op.ll | 6 +- test/CodeGen/X86/avx-blend.ll | 21 +- test/CodeGen/X86/avx-cvt-2.ll | 43 + test/CodeGen/X86/avx-shift.ll | 4 +- test/CodeGen/X86/avx-shuffle.ll | 9 + test/CodeGen/X86/avx-trunc.ll | 8 +- test/CodeGen/X86/avx-vbroadcast.ll | 63 + test/CodeGen/X86/avx-vzeroupper.ll | 37 +- test/CodeGen/X86/avx2-gather.ll | 16 + test/CodeGen/X86/avx2-intrinsics-x86.ll | 8 +- test/CodeGen/X86/avx2-shift.ll | 33 + test/CodeGen/X86/avx2-vbroadcast.ll | 228 +- test/CodeGen/X86/avx2-vector-shifts.ll | 55 +- test/CodeGen/X86/avx512-arith.ll | 139 +- test/CodeGen/X86/avx512-build-vector.ll | 12 + test/CodeGen/X86/avx512-cmp.ll | 67 +- test/CodeGen/X86/avx512-cvt.ll | 34 +- test/CodeGen/X86/avx512-gather-scatter-intrin.ll | 78 + test/CodeGen/X86/avx512-insert-extract.ll | 75 +- test/CodeGen/X86/avx512-intrinsics.ll | 435 +- test/CodeGen/X86/avx512-mask-op.ll | 49 +- test/CodeGen/X86/avx512-mov.ll | 32 +- test/CodeGen/X86/avx512-select.ll | 19 + test/CodeGen/X86/avx512-shuffle.ll | 29 +- test/CodeGen/X86/avx512-trunc-ext.ll | 31 +- test/CodeGen/X86/avx512-vbroadcast.ll | 19 +- test/CodeGen/X86/avx512-vec-cmp.ll | 87 +- test/CodeGen/X86/avx512-vselect-crash.ll | 11 + test/CodeGen/X86/avx512-zext-load-crash.ll | 14 + test/CodeGen/X86/barrier-sse.ll | 11 +- test/CodeGen/X86/blend-msb.ll | 21 +- test/CodeGen/X86/block-placement.ll | 2 +- test/CodeGen/X86/bswap-vector.ll | 19 + test/CodeGen/X86/bt.ll | 2 +- test/CodeGen/X86/cache-intrinsic.ll | 26 + test/CodeGen/X86/call-imm.ll | 2 + test/CodeGen/X86/cas.ll | 2 +- test/CodeGen/X86/catch.ll | 21 + test/CodeGen/X86/cdecl-method-return.ll | 69 + test/CodeGen/X86/cfstring.ll | 6 +- test/CodeGen/X86/cmov.ll | 4 +- test/CodeGen/X86/cmpxchg16b.ll | 2 +- test/CodeGen/X86/coalescer-remat.ll | 2 +- test/CodeGen/X86/codegen-prepare-addrmode-sext.ll | 303 + test/CodeGen/X86/codegen-prepare-cast.ll | 4 + test/CodeGen/X86/codegen-prepare-extload.ll | 2 +- test/CodeGen/X86/combine-or.ll | 269 + test/CodeGen/X86/combine-vec-shuffle.ll | 253 + test/CodeGen/X86/const-base-addr.ll | 24 + test/CodeGen/X86/crash.ll | 6 +- test/CodeGen/X86/cse-add-with-overflow.ll | 43 + test/CodeGen/X86/ctpop-combine.ll | 2 +- test/CodeGen/X86/darwin-no-dead-strip.ll | 12 +- .../X86/dbg-changes-codegen-branch-folding.ll | 109 + test/CodeGen/X86/dbg-changes-codegen.ll | 83 + test/CodeGen/X86/dll-linkage.ll | 14 - test/CodeGen/X86/dllexport-x86_64.ll | 104 + test/CodeGen/X86/dllexport.ll | 125 +- test/CodeGen/X86/dllimport-x86_64.ll | 48 + test/CodeGen/X86/dllimport.ll | 59 + test/CodeGen/X86/dwarf-comp-dir.ll | 6 +- test/CodeGen/X86/dynamic-alloca-in-entry.ll | 19 + test/CodeGen/X86/exedepsfix-broadcast.ll | 128 + test/CodeGen/X86/extract-store.ll | 22 + test/CodeGen/X86/fast-isel-args-fail.ll | 2 +- test/CodeGen/X86/fast-isel-select.ll | 16 + test/CodeGen/X86/fast-isel-x86.ll | 6 +- test/CodeGen/X86/fast-isel.ll | 4 +- test/CodeGen/X86/fastcall-correct-mangling.ll | 25 +- test/CodeGen/X86/fma.ll | 33 + test/CodeGen/X86/fma3-intrinsics.ll | 4 +- test/CodeGen/X86/fma4-intrinsics-x86_64.ll | 2 +- test/CodeGen/X86/fold-call-oper.ll | 48 + test/CodeGen/X86/fold-vector-sext-crash.ll | 12 + test/CodeGen/X86/fold-vector-sext-zext.ll | 291 + test/CodeGen/X86/fold-xmm-zero.ll | 2 +- test/CodeGen/X86/fp-fast.ll | 2 +- test/CodeGen/X86/gcc_except_table.ll | 35 +- test/CodeGen/X86/global-sections.ll | 36 + test/CodeGen/X86/hidden-vis-pic.ll | 7 +- test/CodeGen/X86/i64-mem-copy.ll | 2 +- test/CodeGen/X86/inalloca-ctor.ll | 34 + test/CodeGen/X86/inalloca-invoke.ll | 54 + test/CodeGen/X86/inalloca-stdcall.ll | 26 + test/CodeGen/X86/inalloca.ll | 65 + test/CodeGen/X86/inline-asm-flag-clobber.ll | 2 +- test/CodeGen/X86/inline-asm-fpstack.ll | 2 +- test/CodeGen/X86/inline-asm-h.ll | 2 +- test/CodeGen/X86/inline-asm-modifier-n.ll | 2 +- test/CodeGen/X86/inline-asm-modifier-q.ll | 12 + test/CodeGen/X86/inline-asm-mrv.ll | 8 +- test/CodeGen/X86/inline-asm-q-regs.ll | 2 +- test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll | 17 + test/CodeGen/X86/inline-asm-stack-realign.ll | 16 + test/CodeGen/X86/inline-asm-stack-realign2.ll | 16 + test/CodeGen/X86/inline-asm-stack-realign3.ll | 29 + test/CodeGen/X86/inline-asm-tied.ll | 2 +- test/CodeGen/X86/inline-asm-x-scalar.ll | 2 +- test/CodeGen/X86/inline-asm.ll | 17 +- test/CodeGen/X86/ins_split_regalloc.ll | 33 + test/CodeGen/X86/isint.ll | 26 +- test/CodeGen/X86/large-constants.ll | 67 + test/CodeGen/X86/load-slice.ll | 2 +- test/CodeGen/X86/lsr-interesting-step.ll | 14 +- test/CodeGen/X86/lsr-normalization.ll | 9 +- test/CodeGen/X86/machine-cp.ll | 26 +- test/CodeGen/X86/mature-mc-support.ll | 18 + test/CodeGen/X86/memcmp.ll | 22 +- test/CodeGen/X86/memset-2.ll | 4 +- test/CodeGen/X86/misched-aa-colored.ll | 189 + test/CodeGen/X86/misched-aa-mmos.ll | 37 + test/CodeGen/X86/misched-matmul.ll | 2 +- test/CodeGen/X86/movbe.ll | 45 +- test/CodeGen/X86/ms-inline-asm.ll | 35 +- test/CodeGen/X86/mul128_sext_loop.ll | 32 + test/CodeGen/X86/mult-alt-generic-i686.ll | 2 +- test/CodeGen/X86/mult-alt-generic-x86_64.ll | 2 +- test/CodeGen/X86/mult-alt-x86.ll | 2 +- test/CodeGen/X86/multiple-loop-post-inc.ll | 2 +- test/CodeGen/X86/negate-add-zero.ll | 17 +- test/CodeGen/X86/no-elf-compact-unwind.ll | 48 - test/CodeGen/X86/nocx16.ll | 2 +- test/CodeGen/X86/opaque-constant-asm.ll | 13 + test/CodeGen/X86/osx-private-labels.ll | 71 + test/CodeGen/X86/patchpoint.ll | 76 +- test/CodeGen/X86/peephole-multiple-folds.ll | 29 + test/CodeGen/X86/personality.ll | 15 +- test/CodeGen/X86/personality_size.ll | 4 +- test/CodeGen/X86/pic.ll | 3 +- test/CodeGen/X86/pr10420.ll | 21 - test/CodeGen/X86/pr14090.ll | 70 - test/CodeGen/X86/pr1462.ll | 3 +- test/CodeGen/X86/pr16031.ll | 2 +- test/CodeGen/X86/pr19049.ll | 7 + test/CodeGen/X86/preserve_allcc64.ll | 104 + test/CodeGen/X86/preserve_mostcc64.ll | 86 + test/CodeGen/X86/private-2.ll | 2 +- test/CodeGen/X86/ragreedy-bug.ll | 292 + test/CodeGen/X86/ragreedy-hoist-spill.ll | 389 + .../CodeGen/X86/ragreedy-last-chance-recoloring.ll | 168 + test/CodeGen/X86/rot16.ll | 2 +- test/CodeGen/X86/rotate3.ll | 76 + test/CodeGen/X86/rotate4.ll | 134 + test/CodeGen/X86/saddo-redundant-add.ll | 34 + test/CodeGen/X86/segmented-stacks.ll | 57 +- test/CodeGen/X86/setjmp-spills.ll | 141 + test/CodeGen/X86/shift-combine-crash.ll | 57 + test/CodeGen/X86/shift-double.ll | 2 +- test/CodeGen/X86/shift-pcmp.ll | 30 + test/CodeGen/X86/shl_undef.ll | 6 +- test/CodeGen/X86/shrink-compare.ll | 8 +- test/CodeGen/X86/sibcall-5.ll | 9 +- test/CodeGen/X86/sibcall.ll | 4 +- test/CodeGen/X86/sse-scalar-fp-arith-2.ll | 423 + test/CodeGen/X86/sse-scalar-fp-arith.ll | 310 + test/CodeGen/X86/sse1.ll | 14 + test/CodeGen/X86/sse2-blend.ll | 22 +- test/CodeGen/X86/sse2-intrinsics-x86.ll | 7 + test/CodeGen/X86/sse2-vector-shifts.ll | 180 +- test/CodeGen/X86/sse2.ll | 6 +- test/CodeGen/X86/sse41-blend.ll | 12 +- test/CodeGen/X86/ssp-data-layout.ll | 510 + test/CodeGen/X86/stack-align-memcpy.ll | 27 +- test/CodeGen/X86/stack-protector-dbginfo.ll | 2 +- test/CodeGen/X86/stackmap-liveness.ll | 245 + test/CodeGen/X86/stackmap-nops.ll | 230 + test/CodeGen/X86/stackmap.ll | 333 +- test/CodeGen/X86/stdcall-notailcall.ll | 10 +- test/CodeGen/X86/stdcall.ll | 4 +- test/CodeGen/X86/stores-merging.ll | 23 + test/CodeGen/X86/sunkaddr-ext.ll | 26 + test/CodeGen/X86/tbm-intrinsics-x86_64.ll | 2 +- test/CodeGen/X86/v2f32.ll | 2 +- test/CodeGen/X86/v4i32load-crash.ll | 5 +- test/CodeGen/X86/vaargs.ll | 67 + test/CodeGen/X86/vastart-defs-eflags.ll | 23 + test/CodeGen/X86/vbinop-simplify-bug.ll | 23 + test/CodeGen/X86/vec_round.ll | 2 +- test/CodeGen/X86/vec_setcc-2.ll | 96 + test/CodeGen/X86/vec_setcc.ll | 18 +- test/CodeGen/X86/vec_shift4.ll | 2 +- test/CodeGen/X86/vec_shift5.ll | 160 + test/CodeGen/X86/vec_shift6.ll | 134 + test/CodeGen/X86/vec_shuf-insert.ll | 29 + test/CodeGen/X86/vec_shuffle-40.ll | 22 + test/CodeGen/X86/vector-gep.ll | 31 +- test/CodeGen/X86/viabs.ll | 87 + test/CodeGen/X86/vselect-2.ll | 33 + test/CodeGen/X86/vselect.ll | 264 + test/CodeGen/X86/vshift-4.ll | 2 +- test/CodeGen/X86/vshift-6.ll | 36 + test/CodeGen/X86/warn-stack.ll | 2 +- test/CodeGen/X86/weak_def_can_be_hidden.ll | 37 +- test/CodeGen/X86/widen_load-2.ll | 2 +- test/CodeGen/X86/win32_sret.ll | 98 +- test/CodeGen/X86/win64_alloca_dynalloca.ll | 6 +- test/CodeGen/X86/win_chkstk.ll | 6 +- .../X86/x86-64-double-precision-shift-left.ll | 77 + .../X86/x86-64-double-precision-shift-right.ll | 74 + test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll | 67 + test/CodeGen/X86/x86-64-double-shifts-var.ll | 57 + test/CodeGen/X86/x86-shifts.ll | 2 +- test/CodeGen/X86/zlib-longest-match.ll | 240 + test/CodeGen/XCore/align.ll | 15 + test/CodeGen/XCore/atomic.ll | 76 + test/CodeGen/XCore/bigstructret.ll | 39 +- test/CodeGen/XCore/byVal.ll | 2 +- test/CodeGen/XCore/call.ll | 10 + test/CodeGen/XCore/codemodel.ll | 213 + test/CodeGen/XCore/epilogue_prologue.ll | 222 +- test/CodeGen/XCore/exception.ll | 3 +- test/CodeGen/XCore/globals.ll | 53 +- test/CodeGen/XCore/inline-asm.ll | 21 + test/CodeGen/XCore/linkage.ll | 12 + test/CodeGen/XCore/llvm-intrinsics.ll | 363 + test/CodeGen/XCore/load.ll | 2 +- test/CodeGen/XCore/memcpy.ll | 32 + test/CodeGen/XCore/resources.ll | 16 + test/CodeGen/XCore/resources_combine.ll | 93 + test/CodeGen/XCore/scavenging.ll | 69 +- 1418 files changed, 87495 insertions(+), 5076 deletions(-) create mode 100644 test/CodeGen/AArch64/128bit_load_store.ll create mode 100644 test/CodeGen/AArch64/assertion-rc-mismatch.ll create mode 100644 test/CodeGen/AArch64/concatvector-bugs.ll create mode 100644 test/CodeGen/AArch64/cpus.ll create mode 100644 test/CodeGen/AArch64/i128-shift.ll create mode 100644 test/CodeGen/AArch64/mature-mc-support.ll create mode 100644 test/CodeGen/AArch64/misched-basic-A53.ll create mode 100644 test/CodeGen/AArch64/mul-lohi.ll create mode 100644 test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll create mode 100644 test/CodeGen/AArch64/neon-fpround_f128.ll create mode 100644 test/CodeGen/AArch64/neon-load-store-v1i32.ll create mode 100644 test/CodeGen/AArch64/neon-or-combine.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-ext.ll create mode 100644 test/CodeGen/AArch64/neon-select_cc.ll create mode 100644 test/CodeGen/AArch64/neon-shl-ashr-lshr.ll create mode 100644 test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll create mode 100644 test/CodeGen/AArch64/neon-truncStore-extLoad.ll create mode 100644 test/CodeGen/AArch64/neon-v1i1-setcc.ll create mode 100644 test/CodeGen/AArch64/neon-vector-list-spill.ll create mode 100644 test/CodeGen/AArch64/ragreedy-csr.ll create mode 100644 test/CodeGen/AArch64/sext_inreg.ll create mode 100644 test/CodeGen/AArch64/sincospow-vector-expansion.ll delete mode 100644 test/CodeGen/ARM/2009-08-23-linkerprivate.ll delete mode 100644 test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll create mode 100644 test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll create mode 100644 test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll create mode 100644 test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll create mode 100644 test/CodeGen/ARM/Windows/aapcs.ll create mode 100644 test/CodeGen/ARM/Windows/hard-float.ll create mode 100644 test/CodeGen/ARM/Windows/mangling.ll create mode 100644 test/CodeGen/ARM/Windows/no-aeabi.ll create mode 100644 test/CodeGen/ARM/Windows/no-arm-mode.ll create mode 100644 test/CodeGen/ARM/Windows/no-ehabi.ll create mode 100644 test/CodeGen/ARM/arm-abi-attr.ll create mode 100644 test/CodeGen/ARM/build-attributes.ll create mode 100644 test/CodeGen/ARM/cache-intrinsic.ll create mode 100644 test/CodeGen/ARM/cse-ldrlit.ll create mode 100644 test/CodeGen/ARM/debug-frame-large-stack.ll create mode 100644 test/CodeGen/ARM/debug-frame-no-debug.ll create mode 100644 test/CodeGen/ARM/debug-frame-vararg.ll create mode 100644 test/CodeGen/ARM/debug-frame.ll create mode 100644 test/CodeGen/ARM/debug-segmented-stacks.ll create mode 100644 test/CodeGen/ARM/default-float-abi.ll create mode 100644 test/CodeGen/ARM/fastcc-vfp.ll create mode 100644 test/CodeGen/ARM/fastisel-thumb-litpool.ll create mode 100644 test/CodeGen/ARM/ifcvt-branch-weight-bug.ll create mode 100644 test/CodeGen/ARM/ifcvt-branch-weight.ll create mode 100644 test/CodeGen/ARM/inline-diagnostics.ll create mode 100644 test/CodeGen/ARM/inlineasm-ldr-pseudo.ll create mode 100644 test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll create mode 100644 test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll create mode 100644 test/CodeGen/ARM/inlineasm-switch-mode.ll create mode 100644 test/CodeGen/ARM/ldaex-stlex.ll create mode 100644 test/CodeGen/ARM/mature-mc-support.ll create mode 100644 test/CodeGen/ARM/minsize-imms.ll create mode 100644 test/CodeGen/ARM/minsize-litpools.ll create mode 100644 test/CodeGen/ARM/none-macho.ll create mode 100644 test/CodeGen/ARM/optimize-dmbs-v7.ll delete mode 100644 test/CodeGen/ARM/prefetch-thumb.ll create mode 100644 test/CodeGen/ARM/saxpy10-a9.ll create mode 100644 test/CodeGen/ARM/segmented-stacks-dynamic.ll create mode 100644 test/CodeGen/ARM/segmented-stacks.ll create mode 100644 test/CodeGen/ARM/ssp-data-layout.ll create mode 100644 test/CodeGen/ARM/tail-call.ll create mode 100644 test/CodeGen/ARM/taildup-branch-weight.ll create mode 100644 test/CodeGen/ARM/thumb-litpool.ll create mode 100644 test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll create mode 100644 test/CodeGen/ARM/vfp-regs-dwarf.ll create mode 100644 test/CodeGen/ARM/zero-cycle-zero.ll create mode 100644 test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll create mode 100644 test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll create mode 100644 test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll create mode 100644 test/CodeGen/ARM64/2011-04-21-CPSRBug.ll create mode 100644 test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll create mode 100644 test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll create mode 100644 test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll create mode 100644 test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll create mode 100644 test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll create mode 100644 test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll create mode 100644 test/CodeGen/ARM64/2012-06-06-FPToUI.ll create mode 100644 test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll create mode 100644 test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll create mode 100644 test/CodeGen/ARM64/2013-01-23-frem-crash.ll create mode 100644 test/CodeGen/ARM64/2013-01-23-sext-crash.ll create mode 100644 test/CodeGen/ARM64/2013-02-12-shufv8i8.ll create mode 100644 test/CodeGen/ARM64/AdvSIMD-Scalar.ll create mode 100644 test/CodeGen/ARM64/aapcs.ll create mode 100644 test/CodeGen/ARM64/abi-varargs.ll create mode 100644 test/CodeGen/ARM64/abi.ll create mode 100644 test/CodeGen/ARM64/abi_align.ll create mode 100644 test/CodeGen/ARM64/addp.ll create mode 100644 test/CodeGen/ARM64/addr-mode-folding.ll create mode 100644 test/CodeGen/ARM64/addr-type-promotion.ll create mode 100644 test/CodeGen/ARM64/addrmode.ll create mode 100644 test/CodeGen/ARM64/alloc-no-stack-realign.ll create mode 100644 test/CodeGen/ARM64/alloca-frame-pointer-offset.ll create mode 100644 test/CodeGen/ARM64/andCmpBrToTBZ.ll create mode 100644 test/CodeGen/ARM64/anyregcc-crash.ll create mode 100644 test/CodeGen/ARM64/anyregcc.ll create mode 100644 test/CodeGen/ARM64/arith-saturating.ll create mode 100644 test/CodeGen/ARM64/arith.ll create mode 100644 test/CodeGen/ARM64/atomic-128.ll create mode 100644 test/CodeGen/ARM64/atomic.ll create mode 100644 test/CodeGen/ARM64/basic-pic.ll create mode 100644 test/CodeGen/ARM64/big-imm-offsets.ll create mode 100644 test/CodeGen/ARM64/big-stack.ll create mode 100644 test/CodeGen/ARM64/bitfield-extract.ll create mode 100644 test/CodeGen/ARM64/blockaddress.ll create mode 100644 test/CodeGen/ARM64/build-vector.ll create mode 100644 test/CodeGen/ARM64/call-tailcalls.ll create mode 100644 test/CodeGen/ARM64/cast-opt.ll create mode 100644 test/CodeGen/ARM64/ccmp-heuristics.ll create mode 100644 test/CodeGen/ARM64/ccmp.ll create mode 100644 test/CodeGen/ARM64/coalesce-ext.ll create mode 100644 test/CodeGen/ARM64/code-model-large-abs.ll create mode 100644 test/CodeGen/ARM64/collect-loh-garbage-crash.ll create mode 100644 test/CodeGen/ARM64/collect-loh-str.ll create mode 100644 test/CodeGen/ARM64/collect-loh.ll create mode 100644 test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S create mode 100644 test/CodeGen/ARM64/complex-ret.ll create mode 100644 test/CodeGen/ARM64/convert-v2f64-v2i32.ll create mode 100644 test/CodeGen/ARM64/convert-v2i32-v2f64.ll create mode 100644 test/CodeGen/ARM64/copy-tuple.ll create mode 100644 test/CodeGen/ARM64/crc32.ll create mode 100644 test/CodeGen/ARM64/crypto.ll create mode 100644 test/CodeGen/ARM64/cse.ll create mode 100644 test/CodeGen/ARM64/csel.ll create mode 100644 test/CodeGen/ARM64/cvt.ll create mode 100644 test/CodeGen/ARM64/dagcombiner-convergence.ll create mode 100644 test/CodeGen/ARM64/dagcombiner-load-slicing.ll create mode 100644 test/CodeGen/ARM64/dup.ll create mode 100644 test/CodeGen/ARM64/early-ifcvt.ll create mode 100644 test/CodeGen/ARM64/elf-calls.ll create mode 100644 test/CodeGen/ARM64/elf-constpool.ll create mode 100644 test/CodeGen/ARM64/elf-globals.ll create mode 100644 test/CodeGen/ARM64/ext.ll create mode 100644 test/CodeGen/ARM64/extend-int-to-fp.ll create mode 100644 test/CodeGen/ARM64/extend.ll create mode 100644 test/CodeGen/ARM64/extern-weak.ll create mode 100644 test/CodeGen/ARM64/extload-knownzero.ll create mode 100644 test/CodeGen/ARM64/extract.ll create mode 100644 test/CodeGen/ARM64/extract_subvector.ll create mode 100644 test/CodeGen/ARM64/fast-isel-addr-offset.ll create mode 100644 test/CodeGen/ARM64/fast-isel-alloca.ll create mode 100644 test/CodeGen/ARM64/fast-isel-br.ll create mode 100644 test/CodeGen/ARM64/fast-isel-call.ll create mode 100644 test/CodeGen/ARM64/fast-isel-conversion.ll create mode 100644 test/CodeGen/ARM64/fast-isel-fcmp.ll create mode 100644 test/CodeGen/ARM64/fast-isel-gv.ll create mode 100644 test/CodeGen/ARM64/fast-isel-icmp.ll create mode 100644 test/CodeGen/ARM64/fast-isel-indirectbr.ll create mode 100644 test/CodeGen/ARM64/fast-isel-intrinsic.ll create mode 100644 test/CodeGen/ARM64/fast-isel-materialize.ll create mode 100644 test/CodeGen/ARM64/fast-isel-noconvert.ll create mode 100644 test/CodeGen/ARM64/fast-isel-rem.ll create mode 100644 test/CodeGen/ARM64/fast-isel-ret.ll create mode 100644 test/CodeGen/ARM64/fast-isel-select.ll create mode 100644 test/CodeGen/ARM64/fast-isel.ll create mode 100644 test/CodeGen/ARM64/fastcc-tailcall.ll create mode 100644 test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll create mode 100644 test/CodeGen/ARM64/fcmp-opt.ll create mode 100644 test/CodeGen/ARM64/fcopysign.ll create mode 100644 test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll create mode 100644 test/CodeGen/ARM64/fmadd.ll create mode 100644 test/CodeGen/ARM64/fmax.ll create mode 100644 test/CodeGen/ARM64/fminv.ll create mode 100644 test/CodeGen/ARM64/fmuladd.ll create mode 100644 test/CodeGen/ARM64/fold-address.ll create mode 100644 test/CodeGen/ARM64/fold-lsl.ll create mode 100644 test/CodeGen/ARM64/fp-imm.ll create mode 100644 test/CodeGen/ARM64/fp.ll create mode 100644 test/CodeGen/ARM64/fp128-folding.ll create mode 100644 test/CodeGen/ARM64/fp128.ll create mode 100644 test/CodeGen/ARM64/frame-index.ll create mode 100644 test/CodeGen/ARM64/frameaddr.ll create mode 100644 test/CodeGen/ARM64/global-address.ll create mode 100644 test/CodeGen/ARM64/hello.ll create mode 100644 test/CodeGen/ARM64/i16-subreg-extract.ll create mode 100644 test/CodeGen/ARM64/icmp-opt.ll create mode 100644 test/CodeGen/ARM64/illegal-float-ops.ll create mode 100644 test/CodeGen/ARM64/indexed-memory.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-I.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-J.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-K.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-L.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-M.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-N.ll create mode 100644 test/CodeGen/ARM64/inline-asm-zero-reg-error.ll create mode 100644 test/CodeGen/ARM64/inline-asm.ll create mode 100644 test/CodeGen/ARM64/join-reserved.ll create mode 100644 test/CodeGen/ARM64/jumptable.ll create mode 100644 test/CodeGen/ARM64/ld1.ll create mode 100644 test/CodeGen/ARM64/ldp.ll create mode 100644 test/CodeGen/ARM64/ldur.ll create mode 100644 test/CodeGen/ARM64/ldxr-stxr.ll create mode 100644 test/CodeGen/ARM64/leaf-compact-unwind.ll create mode 100644 test/CodeGen/ARM64/leaf.ll create mode 100644 test/CodeGen/ARM64/lit.local.cfg create mode 100644 test/CodeGen/ARM64/long-shift.ll create mode 100644 test/CodeGen/ARM64/memcpy-inline.ll create mode 100644 test/CodeGen/ARM64/memset-inline.ll create mode 100644 test/CodeGen/ARM64/memset-to-bzero.ll create mode 100644 test/CodeGen/ARM64/movi.ll create mode 100644 test/CodeGen/ARM64/mul.ll create mode 100644 test/CodeGen/ARM64/neg.ll create mode 100644 test/CodeGen/ARM64/neon-compare-instructions.ll create mode 100644 test/CodeGen/ARM64/patchpoint.ll create mode 100644 test/CodeGen/ARM64/platform-reg.ll create mode 100644 test/CodeGen/ARM64/popcnt.ll create mode 100644 test/CodeGen/ARM64/prefetch.ll create mode 100644 test/CodeGen/ARM64/promote-const.ll create mode 100644 test/CodeGen/ARM64/redzone.ll create mode 100644 test/CodeGen/ARM64/register-offset-addressing.ll create mode 100644 test/CodeGen/ARM64/register-pairing.ll create mode 100644 test/CodeGen/ARM64/regress-f128csel-flags.ll create mode 100644 test/CodeGen/ARM64/regress-interphase-shift.ll create mode 100644 test/CodeGen/ARM64/return-vector.ll create mode 100644 test/CodeGen/ARM64/returnaddr.ll create mode 100644 test/CodeGen/ARM64/rev.ll create mode 100644 test/CodeGen/ARM64/rounding.ll create mode 100644 test/CodeGen/ARM64/scaled_iv.ll create mode 100644 test/CodeGen/ARM64/scvt.ll create mode 100644 test/CodeGen/ARM64/shifted-sext.ll create mode 100644 test/CodeGen/ARM64/simd-scalar-to-vector.ll create mode 100644 test/CodeGen/ARM64/simplest-elf.ll create mode 100644 test/CodeGen/ARM64/sincos.ll create mode 100644 test/CodeGen/ARM64/sitofp-combine-chains.ll create mode 100644 test/CodeGen/ARM64/sli-sri-opt.ll create mode 100644 test/CodeGen/ARM64/smaxv.ll create mode 100644 test/CodeGen/ARM64/sminv.ll create mode 100644 test/CodeGen/ARM64/spill-lr.ll create mode 100644 test/CodeGen/ARM64/spill.ll create mode 100644 test/CodeGen/ARM64/st1.ll create mode 100644 test/CodeGen/ARM64/stack-no-frame.ll create mode 100644 test/CodeGen/ARM64/stackmap.ll create mode 100644 test/CodeGen/ARM64/stacksave.ll create mode 100644 test/CodeGen/ARM64/stp.ll create mode 100644 test/CodeGen/ARM64/strict-align.ll create mode 100644 test/CodeGen/ARM64/stur.ll create mode 100644 test/CodeGen/ARM64/subvector-extend.ll create mode 100644 test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll create mode 100644 test/CodeGen/ARM64/tbl.ll create mode 100644 test/CodeGen/ARM64/this-return.ll create mode 100644 test/CodeGen/ARM64/tls-darwin.ll create mode 100644 test/CodeGen/ARM64/tls-dynamic-together.ll create mode 100644 test/CodeGen/ARM64/tls-dynamics.ll create mode 100644 test/CodeGen/ARM64/tls-execs.ll create mode 100644 test/CodeGen/ARM64/trap.ll create mode 100644 test/CodeGen/ARM64/trn.ll create mode 100644 test/CodeGen/ARM64/trunc-store.ll create mode 100644 test/CodeGen/ARM64/umaxv.ll create mode 100644 test/CodeGen/ARM64/uminv.ll create mode 100644 test/CodeGen/ARM64/umov.ll create mode 100644 test/CodeGen/ARM64/unaligned_ldst.ll create mode 100644 test/CodeGen/ARM64/uzp.ll create mode 100644 test/CodeGen/ARM64/vaargs.ll create mode 100644 test/CodeGen/ARM64/vabs.ll create mode 100644 test/CodeGen/ARM64/vadd.ll create mode 100644 test/CodeGen/ARM64/vaddlv.ll create mode 100644 test/CodeGen/ARM64/vaddv.ll create mode 100644 test/CodeGen/ARM64/variadic-aapcs.ll create mode 100644 test/CodeGen/ARM64/vbitwise.ll create mode 100644 test/CodeGen/ARM64/vclz.ll create mode 100644 test/CodeGen/ARM64/vcmp.ll create mode 100644 test/CodeGen/ARM64/vcnt.ll create mode 100644 test/CodeGen/ARM64/vcombine.ll create mode 100644 test/CodeGen/ARM64/vcvt.ll create mode 100644 test/CodeGen/ARM64/vcvt_f.ll create mode 100644 test/CodeGen/ARM64/vcvt_f32_su32.ll create mode 100644 test/CodeGen/ARM64/vcvt_n.ll create mode 100644 test/CodeGen/ARM64/vcvt_su32_f32.ll create mode 100644 test/CodeGen/ARM64/vcvtxd_f32_f64.ll create mode 100644 test/CodeGen/ARM64/vecCmpBr.ll create mode 100644 test/CodeGen/ARM64/vecFold.ll create mode 100644 test/CodeGen/ARM64/vector-ext.ll create mode 100644 test/CodeGen/ARM64/vector-imm.ll create mode 100644 test/CodeGen/ARM64/vector-ldst.ll create mode 100644 test/CodeGen/ARM64/vext.ll create mode 100644 test/CodeGen/ARM64/vfloatintrinsics.ll create mode 100644 test/CodeGen/ARM64/vhadd.ll create mode 100644 test/CodeGen/ARM64/vhsub.ll create mode 100644 test/CodeGen/ARM64/virtual_base.ll create mode 100644 test/CodeGen/ARM64/vmax.ll create mode 100644 test/CodeGen/ARM64/vminmaxnm.ll create mode 100644 test/CodeGen/ARM64/vmovn.ll create mode 100644 test/CodeGen/ARM64/vmul.ll create mode 100644 test/CodeGen/ARM64/volatile.ll create mode 100644 test/CodeGen/ARM64/vqadd.ll create mode 100644 test/CodeGen/ARM64/vqsub.ll create mode 100644 test/CodeGen/ARM64/vselect.ll create mode 100644 test/CodeGen/ARM64/vsetcc_fp.ll create mode 100644 test/CodeGen/ARM64/vshift.ll create mode 100644 test/CodeGen/ARM64/vshr.ll create mode 100644 test/CodeGen/ARM64/vshuffle.ll create mode 100644 test/CodeGen/ARM64/vsqrt.ll create mode 100644 test/CodeGen/ARM64/vsra.ll create mode 100644 test/CodeGen/ARM64/vsub.ll create mode 100644 test/CodeGen/ARM64/weak-reference.ll create mode 100644 test/CodeGen/ARM64/xaluo.ll create mode 100644 test/CodeGen/ARM64/zero-cycle-regmov.ll create mode 100644 test/CodeGen/ARM64/zero-cycle-zeroing.ll create mode 100644 test/CodeGen/ARM64/zext.ll create mode 100644 test/CodeGen/ARM64/zextload-unscaled.ll create mode 100644 test/CodeGen/ARM64/zip.ll create mode 100644 test/CodeGen/CPP/attributes.ll create mode 100644 test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll create mode 100644 test/CodeGen/Generic/no-target.ll create mode 100644 test/CodeGen/MSP430/misched-msp430.ll create mode 100644 test/CodeGen/Mips/abicalls.ll create mode 100644 test/CodeGen/Mips/cache-intrinsic.ll create mode 100644 test/CodeGen/Mips/call-optimization.ll create mode 100644 test/CodeGen/Mips/ci2.ll create mode 100644 test/CodeGen/Mips/elf_eflags.ll create mode 100644 test/CodeGen/Mips/elf_st_other.ll create mode 100644 test/CodeGen/Mips/l3mc.ll delete mode 100644 test/CodeGen/Mips/largefr1.ll create mode 100644 test/CodeGen/Mips/lcb2.ll create mode 100644 test/CodeGen/Mips/lcb3c.ll create mode 100644 test/CodeGen/Mips/lcb4a.ll create mode 100644 test/CodeGen/Mips/lcb5.ll create mode 100644 test/CodeGen/Mips/mature-mc-support.ll create mode 100644 test/CodeGen/Mips/mbrsize4a.ll create mode 100644 test/CodeGen/Mips/micromips-atomic.ll create mode 100644 test/CodeGen/Mips/micromips-jal.ll create mode 100644 test/CodeGen/Mips/micromips-load-effective-address.ll create mode 100644 test/CodeGen/Mips/micromips-long-branch.ll create mode 100644 test/CodeGen/Mips/mips16-hf-attr.ll create mode 100644 test/CodeGen/Mips/nacl-align.ll create mode 100644 test/CodeGen/Mips/nacl-branch-delay.ll create mode 100644 test/CodeGen/Mips/nacl-reserved-regs.ll create mode 100644 test/CodeGen/Mips/octeon.ll create mode 100644 test/CodeGen/Mips/octeon_popcnt.ll create mode 100644 test/CodeGen/Mips/optimize-pic-o0.ll create mode 100644 test/CodeGen/Mips/s2rem.ll create mode 100644 test/CodeGen/Mips/sr1.ll create mode 100644 test/CodeGen/Mips/tail16.ll create mode 100644 test/CodeGen/NVPTX/addrspacecast.ll create mode 100644 test/CodeGen/NVPTX/aggr-param.ll create mode 100644 test/CodeGen/NVPTX/call-with-alloca-buffer.ll create mode 100644 test/CodeGen/NVPTX/div-ri.ll create mode 100644 test/CodeGen/NVPTX/ldparam-v4.ll create mode 100644 test/CodeGen/NVPTX/noduplicate-syncthreads.ll create mode 100644 test/CodeGen/NVPTX/symbol-naming.ll delete mode 100644 test/CodeGen/PowerPC/2008-12-12-EH.ll delete mode 100644 test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll create mode 100644 test/CodeGen/PowerPC/aa-tbaa.ll create mode 100644 test/CodeGen/PowerPC/byval-agg-info.ll create mode 100644 test/CodeGen/PowerPC/crash.ll create mode 100644 test/CodeGen/PowerPC/crbit-asm.ll create mode 100644 test/CodeGen/PowerPC/crbits.ll create mode 100644 test/CodeGen/PowerPC/ctrloop-udivti3.ll create mode 100644 test/CodeGen/PowerPC/fast-isel-conversion-p5.ll create mode 100644 test/CodeGen/PowerPC/i1-to-double.ll create mode 100644 test/CodeGen/PowerPC/mature-mc-support.ll create mode 100644 test/CodeGen/PowerPC/ppc32-i1-vaarg.ll create mode 100644 test/CodeGen/PowerPC/pwr7-gt-nop.ll create mode 100644 test/CodeGen/PowerPC/spill-nor0.ll create mode 100644 test/CodeGen/PowerPC/srl-mask.ll delete mode 100644 test/CodeGen/PowerPC/tls-2.ll delete mode 100644 test/CodeGen/PowerPC/tls-gd.ll delete mode 100644 test/CodeGen/PowerPC/tls-ie.ll delete mode 100644 test/CodeGen/PowerPC/tls-ld-2.ll delete mode 100644 test/CodeGen/PowerPC/tls-ld.ll create mode 100644 test/CodeGen/PowerPC/tls-pic.ll create mode 100644 test/CodeGen/PowerPC/vsx-args.ll create mode 100644 test/CodeGen/PowerPC/vsx-fma-m.ll create mode 100644 test/CodeGen/PowerPC/vsx-self-copy.ll create mode 100644 test/CodeGen/PowerPC/vsx-spill.ll create mode 100644 test/CodeGen/PowerPC/vsx.ll create mode 100644 test/CodeGen/PowerPC/vtable-reloc.ll create mode 100644 test/CodeGen/PowerPC/weak_def_can_be_hidden.ll create mode 100644 test/CodeGen/R600/anyext.ll create mode 100644 test/CodeGen/R600/array-ptr-calc-i32.ll create mode 100644 test/CodeGen/R600/basic-branch.ll create mode 100644 test/CodeGen/R600/basic-loop.ll create mode 100644 test/CodeGen/R600/cayman-loop-bug.ll create mode 100644 test/CodeGen/R600/cf-stack-bug.ll create mode 100644 test/CodeGen/R600/codegen-prepare-addrmode-sext.ll create mode 100644 test/CodeGen/R600/fceil.ll create mode 100644 test/CodeGen/R600/ffloor.ll create mode 100644 test/CodeGen/R600/fneg-fabs.ll create mode 100644 test/CodeGen/R600/ftrunc.ll create mode 100644 test/CodeGen/R600/gv-const-addrspace.ll create mode 100644 test/CodeGen/R600/icmp64.ll create mode 100644 test/CodeGen/R600/indirect-private-64.ll create mode 100644 test/CodeGen/R600/infinite-loop-evergreen.ll create mode 100644 test/CodeGen/R600/infinite-loop.ll create mode 100644 test/CodeGen/R600/insert_vector_elt_f64.ll create mode 100644 test/CodeGen/R600/lds-oqap-crash.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.bfi.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.bfm.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.kill.ll create mode 100644 test/CodeGen/R600/llvm.SI.load.dword.ll create mode 100644 test/CodeGen/R600/llvm.SI.sendmsg.ll create mode 100644 test/CodeGen/R600/llvm.exp2.ll create mode 100644 test/CodeGen/R600/llvm.trunc.ll create mode 100644 test/CodeGen/R600/local-64.ll create mode 100644 test/CodeGen/R600/loop-idiom.ll create mode 100644 test/CodeGen/R600/mubuf.ll create mode 100644 test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll create mode 100644 test/CodeGen/R600/register-count-comments.ll create mode 100644 test/CodeGen/R600/salu-to-valu.ll create mode 100644 test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll create mode 100644 test/CodeGen/R600/select-vectors.ll create mode 100644 test/CodeGen/R600/select64.ll create mode 100644 test/CodeGen/R600/setcc-equivalent.ll create mode 100644 test/CodeGen/R600/sext-in-reg.ll create mode 100644 test/CodeGen/R600/smrd.ll create mode 100644 test/CodeGen/R600/store-v3i32.ll create mode 100644 test/CodeGen/R600/store-v3i64.ll create mode 100644 test/CodeGen/R600/trunc-store-i1.ll create mode 100644 test/CodeGen/R600/unhandled-loop-condition-assertion.ll create mode 100644 test/CodeGen/R600/unroll.ll create mode 100644 test/CodeGen/R600/v1i64-kernel-arg.ll create mode 100644 test/CodeGen/R600/v_cndmask.ll create mode 100644 test/CodeGen/R600/vtx-fetch-branch.ll create mode 100644 test/CodeGen/SPARC/64spill.ll create mode 100644 test/CodeGen/SPARC/atomics.ll create mode 100644 test/CodeGen/SPARC/inlineasm.ll create mode 100644 test/CodeGen/SPARC/mature-mc-support.ll create mode 100644 test/CodeGen/SPARC/missinglabel.ll create mode 100644 test/CodeGen/SPARC/obj-relocs.ll create mode 100644 test/CodeGen/SPARC/parts.ll create mode 100644 test/CodeGen/SPARC/spillsize.ll create mode 100644 test/CodeGen/SPARC/trap.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-add-05.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-add-06.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-and-05.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-and-06.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-or-05.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-or-06.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-sub-05.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-sub-06.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-xor-05.ll create mode 100644 test/CodeGen/SystemZ/atomicrmw-xor-06.ll create mode 100644 test/CodeGen/SystemZ/fp-conv-13.ll create mode 100644 test/CodeGen/SystemZ/fp-conv-14.ll create mode 100644 test/CodeGen/SystemZ/mature-mc-support.ll create mode 100644 test/CodeGen/SystemZ/selectcc-01.ll create mode 100644 test/CodeGen/SystemZ/selectcc-02.ll create mode 100644 test/CodeGen/SystemZ/selectcc-03.ll create mode 100644 test/CodeGen/SystemZ/serialize-01.ll create mode 100644 test/CodeGen/Thumb/cortex-m0-unaligned-access.ll create mode 100644 test/CodeGen/Thumb/mature-mc-support.ll create mode 100644 test/CodeGen/Thumb/segmented-stacks-dynamic.ll create mode 100644 test/CodeGen/Thumb/segmented-stacks.ll create mode 100644 test/CodeGen/Thumb/sjljehprepare-lower-vector.ll create mode 100644 test/CodeGen/Thumb/triple.ll create mode 100644 test/CodeGen/Thumb2/segmented-stacks.ll create mode 100644 test/CodeGen/Thumb2/v8_IT_6.ll delete mode 100644 test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll delete mode 100644 test/CodeGen/X86/2007-10-17-IllegalAsm.ll delete mode 100644 test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll delete mode 100644 test/CodeGen/X86/2009-08-23-linkerprivate.ll delete mode 100644 test/CodeGen/X86/2010-12-02-MC-Set.ll create mode 100644 test/CodeGen/X86/MachineBranchProb.ll delete mode 100644 test/CodeGen/X86/alias-error.ll create mode 100644 test/CodeGen/X86/atom-cmpb.ll create mode 100644 test/CodeGen/X86/avx-cvt-2.ll create mode 100644 test/CodeGen/X86/avx512-vselect-crash.ll create mode 100644 test/CodeGen/X86/avx512-zext-load-crash.ll create mode 100644 test/CodeGen/X86/bswap-vector.ll create mode 100644 test/CodeGen/X86/cache-intrinsic.ll create mode 100644 test/CodeGen/X86/catch.ll create mode 100644 test/CodeGen/X86/cdecl-method-return.ll create mode 100644 test/CodeGen/X86/codegen-prepare-addrmode-sext.ll create mode 100644 test/CodeGen/X86/combine-or.ll create mode 100644 test/CodeGen/X86/combine-vec-shuffle.ll create mode 100644 test/CodeGen/X86/const-base-addr.ll create mode 100644 test/CodeGen/X86/cse-add-with-overflow.ll create mode 100644 test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll create mode 100644 test/CodeGen/X86/dbg-changes-codegen.ll delete mode 100644 test/CodeGen/X86/dll-linkage.ll create mode 100644 test/CodeGen/X86/dllexport-x86_64.ll create mode 100644 test/CodeGen/X86/dllimport-x86_64.ll create mode 100644 test/CodeGen/X86/dllimport.ll create mode 100644 test/CodeGen/X86/dynamic-alloca-in-entry.ll create mode 100644 test/CodeGen/X86/exedepsfix-broadcast.ll create mode 100644 test/CodeGen/X86/extract-store.ll create mode 100644 test/CodeGen/X86/fast-isel-select.ll create mode 100644 test/CodeGen/X86/fold-call-oper.ll create mode 100644 test/CodeGen/X86/fold-vector-sext-crash.ll create mode 100644 test/CodeGen/X86/fold-vector-sext-zext.ll create mode 100644 test/CodeGen/X86/inalloca-ctor.ll create mode 100644 test/CodeGen/X86/inalloca-invoke.ll create mode 100644 test/CodeGen/X86/inalloca-stdcall.ll create mode 100644 test/CodeGen/X86/inalloca.ll create mode 100644 test/CodeGen/X86/inline-asm-modifier-q.ll create mode 100644 test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll create mode 100644 test/CodeGen/X86/inline-asm-stack-realign.ll create mode 100644 test/CodeGen/X86/inline-asm-stack-realign2.ll create mode 100644 test/CodeGen/X86/inline-asm-stack-realign3.ll create mode 100644 test/CodeGen/X86/ins_split_regalloc.ll create mode 100644 test/CodeGen/X86/large-constants.ll create mode 100644 test/CodeGen/X86/mature-mc-support.ll create mode 100644 test/CodeGen/X86/misched-aa-colored.ll create mode 100644 test/CodeGen/X86/misched-aa-mmos.ll create mode 100644 test/CodeGen/X86/mul128_sext_loop.ll delete mode 100644 test/CodeGen/X86/no-elf-compact-unwind.ll create mode 100644 test/CodeGen/X86/opaque-constant-asm.ll create mode 100644 test/CodeGen/X86/osx-private-labels.ll create mode 100644 test/CodeGen/X86/peephole-multiple-folds.ll delete mode 100644 test/CodeGen/X86/pr10420.ll delete mode 100644 test/CodeGen/X86/pr14090.ll create mode 100644 test/CodeGen/X86/pr19049.ll create mode 100644 test/CodeGen/X86/preserve_allcc64.ll create mode 100644 test/CodeGen/X86/preserve_mostcc64.ll create mode 100644 test/CodeGen/X86/ragreedy-bug.ll create mode 100644 test/CodeGen/X86/ragreedy-hoist-spill.ll create mode 100644 test/CodeGen/X86/ragreedy-last-chance-recoloring.ll create mode 100644 test/CodeGen/X86/rotate3.ll create mode 100644 test/CodeGen/X86/rotate4.ll create mode 100644 test/CodeGen/X86/saddo-redundant-add.ll create mode 100644 test/CodeGen/X86/setjmp-spills.ll create mode 100644 test/CodeGen/X86/shift-combine-crash.ll create mode 100644 test/CodeGen/X86/shift-pcmp.ll create mode 100644 test/CodeGen/X86/sse-scalar-fp-arith-2.ll create mode 100644 test/CodeGen/X86/sse-scalar-fp-arith.ll create mode 100644 test/CodeGen/X86/ssp-data-layout.ll create mode 100644 test/CodeGen/X86/stackmap-liveness.ll create mode 100644 test/CodeGen/X86/stackmap-nops.ll create mode 100644 test/CodeGen/X86/stores-merging.ll create mode 100644 test/CodeGen/X86/sunkaddr-ext.ll create mode 100644 test/CodeGen/X86/vaargs.ll create mode 100644 test/CodeGen/X86/vastart-defs-eflags.ll create mode 100644 test/CodeGen/X86/vbinop-simplify-bug.ll create mode 100644 test/CodeGen/X86/vec_setcc-2.ll create mode 100644 test/CodeGen/X86/vec_shift5.ll create mode 100644 test/CodeGen/X86/vec_shift6.ll create mode 100644 test/CodeGen/X86/vec_shuf-insert.ll create mode 100644 test/CodeGen/X86/vec_shuffle-40.ll create mode 100644 test/CodeGen/X86/vselect-2.ll create mode 100644 test/CodeGen/X86/vselect.ll create mode 100644 test/CodeGen/X86/vshift-6.ll create mode 100644 test/CodeGen/X86/x86-64-double-precision-shift-left.ll create mode 100644 test/CodeGen/X86/x86-64-double-precision-shift-right.ll create mode 100644 test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll create mode 100644 test/CodeGen/X86/x86-64-double-shifts-var.ll create mode 100644 test/CodeGen/X86/zlib-longest-match.ll create mode 100644 test/CodeGen/XCore/align.ll create mode 100644 test/CodeGen/XCore/call.ll create mode 100644 test/CodeGen/XCore/codemodel.ll create mode 100644 test/CodeGen/XCore/llvm-intrinsics.ll create mode 100644 test/CodeGen/XCore/memcpy.ll create mode 100644 test/CodeGen/XCore/resources_combine.ll (limited to 'test/CodeGen') diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll new file mode 100644 index 0000000..502fd70 --- /dev/null +++ b/test/CodeGen/AArch64/128bit_load_store.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s + +define void @test_store_f128(fp128* %ptr, fp128 %val) #0 { +; CHECK: test_store_f128 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store fp128 %val, fp128* %ptr, align 16 + ret void +} + +define fp128 @test_load_f128(fp128* readonly %ptr) #2 { +; CHECK: test_load_f128 +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + %0 = load fp128* %ptr, align 16 + ret fp128 %0 +} + +define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 { +; CHECK: test_vstrq_p128 +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] +; CHECK-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}] +entry: + %0 = bitcast i128* %ptr to fp128* + %1 = bitcast i128 %val to fp128 + store fp128 %1, fp128* %0, align 16 + ret void +} + +define i128 @test_vldrq_p128(i128* readonly %ptr) #2 { +; CHECK: test_vldrq_p128 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] +entry: + %0 = bitcast i128* %ptr to fp128* + %1 = load fp128* %0, align 16 + %2 = bitcast fp128 %1 to i128 + ret i128 %2 +} + +define void @test_ld_st_p128(i128* nocapture %ptr) #0 { +; CHECK: test_ld_st_p128 +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] +entry: + %0 = bitcast i128* %ptr to fp128* + %1 = load fp128* %0, align 16 + %add.ptr = getelementptr inbounds i128* %ptr, i64 1 + %2 = bitcast i128* %add.ptr to fp128* + store fp128 %1, fp128* %2, align 16 + ret void +} + diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll index 26fd3e6..29637d3 100644 --- a/test/CodeGen/AArch64/adc.ll +++ b/test/CodeGen/AArch64/adc.ll @@ -1,15 +1,20 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s define i128 @test_simple(i128 %a, i128 %b, i128 %c) { ; CHECK-LABEL: test_simple: %valadd = add i128 %a, %b -; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2 -; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3 +; CHECK-LE: adds [[ADDLO:x[0-9]+]], x0, x2 +; CHECK-LE-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3 +; CHECK-BE: adds [[ADDLO:x[0-9]+]], x1, x3 +; CHECK-BE-NEXT: adcs [[ADDHI:x[0-9]+]], x0, x2 %valsub = sub i128 %valadd, %c -; CHECK: subs x0, [[ADDLO]], x4 -; CHECK: sbcs x1, [[ADDHI]], x5 +; CHECK-LE: subs x0, [[ADDLO]], x4 +; CHECK-LE: sbcs x1, [[ADDHI]], x5 +; CHECK-BE: subs x1, [[ADDLO]], x5 +; CHECK-BE: sbcs x0, [[ADDHI]], x4 ret i128 %valsub ; CHECK: ret @@ -19,8 +24,10 @@ define i128 @test_imm(i128 %a) { ; CHECK-LABEL: test_imm: %val = add i128 %a, 12 -; CHECK: adds x0, x0, #12 -; CHECK: adcs x1, x1, {{x[0-9]|xzr}} +; CHECK-LE: adds x0, x0, #12 +; CHECK-LE: adcs x1, x1, {{x[0-9]|xzr}} +; CHECK-BE: adds x1, x1, #12 +; CHECK-BE: adcs x0, x0, {{x[0-9]|xzr}} ret i128 %val ; CHECK: ret @@ -32,8 +39,10 @@ define i128 @test_shifted(i128 %a, i128 %b) { %rhs = shl i128 %b, 45 %val = add i128 %a, %rhs -; CHECK: adds x0, x0, x2, lsl #45 -; CHECK: adcs x1, x1, {{x[0-9]}} +; CHECK-LE: adds x0, x0, x2, lsl #45 +; CHECK-LE: adcs x1, x1, {{x[0-9]}} +; CHECK-BE: adds x1, x1, x3, lsl #45 +; CHECK-BE: adcs x0, x0, {{x[0-9]}} ret i128 %val ; CHECK: ret @@ -46,8 +55,10 @@ define i128 @test_extended(i128 %a, i16 %b) { %rhs = shl i128 %ext, 3 %val = add i128 %a, %rhs -; CHECK: adds x0, x0, w2, sxth #3 -; CHECK: adcs x1, x1, {{x[0-9]}} +; CHECK-LE: adds x0, x0, w2, sxth #3 +; CHECK-LE: adcs x1, x1, {{x[0-9]}} +; CHECK-BE: adds x1, x1, w2, sxth #3 +; CHECK-BE: adcs x0, x0, {{x[0-9]}} ret i128 %val ; CHECK: ret diff --git a/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/test/CodeGen/AArch64/assertion-rc-mismatch.ll new file mode 100644 index 0000000..02b0c0e --- /dev/null +++ b/test/CodeGen/AArch64/assertion-rc-mismatch.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; Test case related to . + +; CHECK-LABEL: small +define i64 @small(i64 %encodedBase) { +cmp: + %lnot.i.i = icmp eq i64 %encodedBase, 0 + br i1 %lnot.i.i, label %if, label %else +if: + %tmp1 = call i8* @llvm.returnaddress(i32 0) + br label %end +else: + %tmp3 = call i8* @llvm.returnaddress(i32 0) + %ptr = getelementptr inbounds i8* %tmp3, i64 -16 + %ld = load i8* %ptr, align 4 + %tmp2 = inttoptr i8 %ld to i8* + br label %end +end: + %tmp = phi i8* [ %tmp1, %if ], [ %tmp2, %else ] + %coerce.val.pi56 = ptrtoint i8* %tmp to i64 + ret i64 %coerce.val.pi56 +} + +declare i8* @llvm.returnaddress(i32) diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index de84ff4..5fe2936 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s @var8 = global i8 0 @var16 = global i16 0 @@ -17,6 +18,8 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -37,6 +40,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -57,6 +62,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -77,6 +84,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: add x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -97,6 +106,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -117,6 +128,8 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -137,6 +150,8 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -157,6 +172,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: sub x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -177,6 +194,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -197,6 +216,8 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -217,6 +238,8 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -237,6 +260,8 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: and x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -257,6 +282,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -277,6 +304,8 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -297,6 +326,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -317,6 +348,8 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: orr x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -337,6 +370,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -357,6 +392,8 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -377,6 +414,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -397,6 +436,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: eor x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -416,6 +457,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stxrb w0, w0, [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -435,6 +477,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stlxrh w0, w0, [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -454,6 +497,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stlxr w0, w0, [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -473,6 +517,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stxr w0, x0, [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -495,6 +540,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt +; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -516,6 +563,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt +; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -537,6 +586,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt +; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -558,6 +609,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, gt +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -579,6 +632,8 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -600,6 +655,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -621,6 +678,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -642,6 +701,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lt +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -663,6 +724,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -684,6 +747,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -705,6 +770,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -726,6 +793,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, hi +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -747,6 +816,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -768,6 +839,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -789,6 +862,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -810,6 +885,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lo +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -820,7 +897,7 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: - %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire + %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 @@ -832,6 +909,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stxrb w1, w1, [x{{[0-9]+}}] ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -842,7 +920,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: - %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst + %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -854,6 +932,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stlxrh w1, w1, [x{{[0-9]+}}] ; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -864,7 +943,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: - %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release + %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 @@ -876,6 +955,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stlxr w1, w1, [x{{[0-9]+}}] ; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -886,7 +966,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: - %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic + %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -898,6 +978,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-NEXT: cmp x[[OLD]], x0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stxr w1, x1, [x{{[0-9]+}}] ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb diff --git a/test/CodeGen/AArch64/concatvector-bugs.ll b/test/CodeGen/AArch64/concatvector-bugs.ll new file mode 100644 index 0000000..5889e22 --- /dev/null +++ b/test/CodeGen/AArch64/concatvector-bugs.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon +; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault. +; Fix: Removed i8 type from FPR8 register class. + +define void @test_concatvector_v8i8() { +entry.split: + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry.split + unreachable + +if.end: ; preds = %entry.split + br i1 undef, label %if.then9, label %if.end18 + +if.then9: ; preds = %if.end + unreachable + +if.end18: ; preds = %if.end + br label %for.body + +for.body: ; preds = %for.inc, %if.end18 + br i1 false, label %if.then30, label %for.inc + +if.then30: ; preds = %for.body + unreachable + +for.inc: ; preds = %for.body + br i1 undef, label %for.end, label %for.body + +for.end: ; preds = %for.inc + br label %for.body77 + +for.body77: ; preds = %for.body77, %for.end + br i1 undef, label %for.end106, label %for.body77 + +for.end106: ; preds = %for.body77 + br i1 undef, label %for.body130.us.us, label %stmt.for.body130.us.us + +stmt.for.body130.us.us: ; preds = %stmt.for.body130.us.us, %for.end106 + %_p_splat.us = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer + store <8 x i8> %_p_splat.us, <8 x i8>* undef, align 1 + br label %stmt.for.body130.us.us + +for.body130.us.us: ; preds = %for.body130.us.us, %for.end106 + br label %for.body130.us.us +} + +declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) + +define <8 x i16> @test_splat(i32 %l) nounwind { +; CHECK-LABEL: test_splat: +; CHECK: ret + %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 + %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) + %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %vec +} + + +define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind { +; CHECK-LABEL: test_notsplat: +; CHECK: ret +entry: + %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 + %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) + %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> + ret <8 x i16> %vec +} diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll new file mode 100644 index 0000000..f0b60f0 --- /dev/null +++ b/test/CodeGen/AArch64/cpus.ll @@ -0,0 +1,13 @@ +; This tests that llc accepts all valid AArch64 CPUs + +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID + +; CHECK-NOT: {{.*}} is not a recognized processor for this target +; INVALID: {{.*}} is not a recognized processor for this target + +define i32 @f(i64 %z) { + ret i32 0 +} diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll index b28eb3e..97427a7 100644 --- a/test/CodeGen/AArch64/fcvt-int.ll +++ b/test/CodeGen/AArch64/fcvt-int.ll @@ -69,7 +69,7 @@ define float @test_i32tofloat(i32 %in) { ; CHECK-DAG: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}} %res = fsub float %signed, %unsigned -; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]] +; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]] ret float %res ; CHECK: ret } diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll index 590557f..2a6790e 100644 --- a/test/CodeGen/AArch64/fp-dp3.ll +++ b/test/CodeGen/AArch64/fp-dp3.ll @@ -26,8 +26,9 @@ define float @test_fmsub(float %a, float %b, float %c) { define float @test_fnmadd(float %a, float %b, float %c) { ; CHECK-LABEL: test_fnmadd: ; CHECK-NOFAST-LABEL: test_fnmadd: + %nega = fsub float -0.0, %a %negc = fsub float -0.0, %c - %val = call float @llvm.fma.f32(float %a, float %b, float %negc) + %val = call float @llvm.fma.f32(float %nega, float %b, float %negc) ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret float %val @@ -36,9 +37,8 @@ define float @test_fnmadd(float %a, float %b, float %c) { define float @test_fnmsub(float %a, float %b, float %c) { ; CHECK-LABEL: test_fnmsub: ; CHECK-NOFAST-LABEL: test_fnmsub: - %nega = fsub float -0.0, %a %negc = fsub float -0.0, %c - %val = call float @llvm.fma.f32(float %nega, float %b, float %negc) + %val = call float @llvm.fma.f32(float %a, float %b, float %negc) ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret float %val @@ -66,8 +66,9 @@ define double @testd_fmsub(double %a, double %b, double %c) { define double @testd_fnmadd(double %a, double %b, double %c) { ; CHECK-LABEL: testd_fnmadd: ; CHECK-NOFAST-LABEL: testd_fnmadd: + %nega = fsub double -0.0, %a %negc = fsub double -0.0, %c - %val = call double @llvm.fma.f64(double %a, double %b, double %negc) + %val = call double @llvm.fma.f64(double %nega, double %b, double %negc) ; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ; CHECK-NOFAST: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret double %val @@ -76,9 +77,8 @@ define double @testd_fnmadd(double %a, double %b, double %c) { define double @testd_fnmsub(double %a, double %b, double %c) { ; CHECK-LABEL: testd_fnmsub: ; CHECK-NOFAST-LABEL: testd_fnmsub: - %nega = fsub double -0.0, %a %negc = fsub double -0.0, %c - %val = call double @llvm.fma.f64(double %nega, double %b, double %negc) + %val = call double @llvm.fma.f64(double %a, double %b, double %negc) ; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ; CHECK-NOFAST: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret double %val @@ -113,12 +113,13 @@ define float @test_fnmadd_unfused(float %a, float %b, float %c) { ; CHECK-NOFAST-LABEL: test_fnmadd_unfused: %nega = fsub float -0.0, %a %prod = fmul float %b, %c - %sum = fadd float %nega, %prod + %diff = fsub float %nega, %prod ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST-NOT: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret float %sum +; CHECK-NOFAST: ret + ret float %diff } define float @test_fnmsub_unfused(float %a, float %b, float %c) { @@ -126,12 +127,37 @@ define float @test_fnmsub_unfused(float %a, float %b, float %c) { ; CHECK-NOFAST-LABEL: test_fnmsub_unfused: %nega = fsub float -0.0, %a %prod = fmul float %b, %c - %diff = fsub float %nega, %prod + %sum = fadd float %nega, %prod ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST-DAG: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST-DAG: fneg {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST-DAG: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST: ret - ret float %diff +; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %sum } + +; Another set of tests that check for multiply single use + +define float @test_fmadd_unfused_su(float %a, float %b, float %c) { +; CHECK-LABEL: test_fmadd_unfused_su: + %prod = fmul float %b, %c + %sum = fadd float %a, %prod + %res = fadd float %sum, %prod +; CHECK-NOT: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %res +} + +define float @test_fmsub_unfused_su(float %a, float %b, float %c) { +; CHECK-LABEL: test_fmsub_unfused_su: + %prod = fmul float %b, %c + %diff = fsub float %a, %prod + %res = fsub float %diff, %prod +; CHECK-NOT: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %res +} + diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index 430d77f..f307686 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s %myStruct = type { i64 , i8, i32 } @@ -146,7 +148,8 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var %retval = load volatile i32* %stacked ret i32 %retval -; CHECK: ldr w0, [sp, #16] +; CHECK-LE: ldr w0, [sp, #16] +; CHECK-BE: ldr w0, [sp, #20] } define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, @@ -180,8 +183,10 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ; CHECK: check_i128_stackalign store i128 %stack2, i128* @var128 ; Nothing local on stack in current codegen, so first stack is 16 away -; CHECK: add x[[REG:[0-9]+]], sp, #16 -; CHECK: ldr {{x[0-9]+}}, [x[[REG]], #8] +; CHECK-LE: add x[[REG:[0-9]+]], sp, #16 +; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8] +; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24] + ; Important point is that we address sp+24 for second dword ; CHECK: ldr {{x[0-9]+}}, [sp, #16] ret void diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index ac188bb..f029bf2 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -1,5 +1,7 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s %myStruct = type { i64 , i8, i32 } @@ -126,8 +128,10 @@ define void @check_i128_align() { call void @check_i128_regalign(i32 0, i128 42) ; CHECK-NOT: mov x1 -; CHECK: movz x2, #42 -; CHECK: mov x3, xzr +; CHECK-LE: movz x2, #42 +; CHECK-LE: mov x3, xzr +; CHECK-BE: movz x3, #42 +; CHECK-BE: mov x2, xzr ; CHECK: bl check_i128_regalign ret void diff --git a/test/CodeGen/AArch64/i128-shift.ll b/test/CodeGen/AArch64/i128-shift.ll new file mode 100644 index 0000000..d786d44 --- /dev/null +++ b/test/CodeGen/AArch64/i128-shift.ll @@ -0,0 +1,43 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i128 @test_i128_lsl(i128 %a, i32 %shift) { +; CHECK-LABEL: test_i128_lsl: + + %sh_prom = zext i32 %shift to i128 + %shl = shl i128 %a, %sh_prom + +; CHECK: movz [[SIXTYFOUR:x[0-9]+]], #64 +; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw +; CHECK-NEXT: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[REVSHAMT]] +; CHECK: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[SHAMT:x[0-9]+]] +; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] +; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 +; CHECK-NEXT: lsl [[TMP3:x[0-9]+]], [[LO]], [[EXTRASHAMT]] +; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 +; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], [[TMP3]], [[FALSEVAL]], ge +; CHECK-NEXT: lsl [[TMP4:x[0-9]+]], [[LO]], [[SHAMT]] +; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], xzr, [[TMP4]], ge + + ret i128 %shl +} + +define i128 @test_i128_shr(i128 %a, i32 %shift) { +; CHECK-LABEL: test_i128_shr: + + %sh_prom = zext i32 %shift to i128 + %shr = lshr i128 %a, %sh_prom + +; CHECK: movz [[SIXTYFOUR]], #64 +; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw +; CHECK-NEXT: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[REVSHAMT]] +; CHECK: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[SHAMT:x[0-9]+]] +; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] +; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 +; CHECK-NEXT: lsr [[TRUEVAL:x[0-9]+]], [[HI]], [[EXTRASHAMT]] +; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 +; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], [[TRUEVAL]], [[FALSEVAL]], ge +; CHECK-NEXT: lsr [[TMP3:x[0-9]+]], [[HI]], [[SHAMT]] +; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], xzr, [[TMP3]], ge + + ret i128 %shr +} diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll index 3ff1c1a..076ae27 100644 --- a/test/CodeGen/AArch64/init-array.ll +++ b/test/CodeGen/AArch64/init-array.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s define internal void @_GLOBAL__I_a() section ".text.startup" { ret void diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll index 18a3b37..365453c 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints.ll @@ -1,4 +1,4 @@ -;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -no-integrated-as < %s | FileCheck %s define i64 @test_inline_constraint_r(i64 %base, i32 %offset) { ; CHECK-LABEL: test_inline_constraint_r: diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll index b7f4d3c..cb66335 100644 --- a/test/CodeGen/AArch64/inline-asm-modifiers.ll +++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -no-integrated-as < %s | FileCheck %s @var_simple = hidden global i32 0 @var_got = global i32 0 diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll index 4bb0942..94717f5 100644 --- a/test/CodeGen/AArch64/jump-table.ll +++ b/test/CodeGen/AArch64/jump-table.ll @@ -1,5 +1,6 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic <%s | FileCheck --check-prefix=CHECK-PIC %s define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable @@ -22,6 +23,12 @@ define i32 @test_jumptable(i32 %in) { ; CHECK-LARGE: ldr [[DEST:x[0-9]+]], [x[[JTADDR]], {{x[0-9]+}}, lsl #3] ; CHECK-LARGE: br [[DEST]] +; CHECK-PIC: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 +; CHECK-PIC: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0 +; CHECK-PIC: ldrsw [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2] +; CHECK-PIC: add [[TABLE:x[0-9]+]], [[DEST]], x[[JT]] +; CHECK-PIC: br [[TABLE]] + def: ret i32 0 @@ -47,3 +54,12 @@ lbl4: ; CHECK-NEXT: .xword ; CHECK-NEXT: .xword ; CHECK-NEXT: .xword + +; CHECK-PIC-NOT: .data_region +; CHECK-PIC: .LJTI0_0: +; CHECK-PIC-NEXT: .word +; CHECK-PIC-NEXT: .word +; CHECK-PIC-NEXT: .word +; CHECK-PIC-NEXT: .word +; CHECK-PIC-NEXT: .word +; CHECK-PIC-NOT: .end_data_region diff --git a/test/CodeGen/AArch64/mature-mc-support.ll b/test/CodeGen/AArch64/mature-mc-support.ll new file mode 100644 index 0000000..06e3cc7 --- /dev/null +++ b/test/CodeGen/AArch64/mature-mc-support.ll @@ -0,0 +1,12 @@ +; Test that inline assembly is parsed by the MC layer when MC support is mature +; (even when the output is assembly). + +; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t1 +; RUN: FileCheck %s < %t1 + +; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t2 +; RUN: FileCheck %s < %t2 + +module asm " .this_directive_is_very_unlikely_to_exist" + +; CHECK: LLVM ERROR: Error parsing inline asm diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll new file mode 100644 index 0000000..1555c48 --- /dev/null +++ b/test/CodeGen/AArch64/misched-basic-A53.ll @@ -0,0 +1,112 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; +; The Cortex-A53 machine model will cause the MADD instruction to be scheduled +; much higher than the ADD instructions in order to hide latency. When not +; specifying a subtarget, the MADD will remain near the end of the block. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: main +; CHECK: *** Final schedule for BB#2 *** +; CHECK: SU(13) +; CHECK: MADDwwww +; CHECK: SU(4) +; CHECK: ADDwwi_lsl0_s +; CHECK: ********** INTERVALS ********** +@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 +@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %x = alloca [8 x i32], align 4 + %y = alloca [8 x i32], align 4 + %i = alloca i32, align 4 + %xx = alloca i32, align 4 + %yy = alloca i32, align 4 + store i32 0, i32* %retval + %0 = bitcast [8 x i32]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + %1 = bitcast [8 x i32]* %y to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + store i32 0, i32* %xx, align 4 + store i32 0, i32* %yy, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %2 = load i32* %i, align 4 + %cmp = icmp slt i32 %2, 8 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %3 = load i32* %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom + %4 = load i32* %arrayidx, align 4 + %add = add nsw i32 %4, 1 + store i32 %add, i32* %xx, align 4 + %5 = load i32* %xx, align 4 + %add1 = add nsw i32 %5, 12 + store i32 %add1, i32* %xx, align 4 + %6 = load i32* %xx, align 4 + %add2 = add nsw i32 %6, 23 + store i32 %add2, i32* %xx, align 4 + %7 = load i32* %xx, align 4 + %add3 = add nsw i32 %7, 34 + store i32 %add3, i32* %xx, align 4 + %8 = load i32* %i, align 4 + %idxprom4 = sext i32 %8 to i64 + %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4 + %9 = load i32* %arrayidx5, align 4 + %10 = load i32* %yy, align 4 + %mul = mul nsw i32 %10, %9 + store i32 %mul, i32* %yy, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %11 = load i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %12 = load i32* %xx, align 4 + %13 = load i32* %yy, align 4 + %add6 = add nsw i32 %12, %13 + ret i32 %add6 +} + + +; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to +; hide latency. Whereas normally there would only be a single FADDvvv_4s +; after it, this test checks to make sure there are more than one. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: neon4xfloat:BB#0 +; CHECK: *** Final schedule for BB#0 *** +; CHECK: FDIVvvv_4S +; CHECK: FADDvvv_4S +; CHECK: FADDvvv_4S +; CHECK: ********** INTERVALS ********** +define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { + %tmp1 = fadd <4 x float> %A, %B; + %tmp2 = fadd <4 x float> %A, %tmp1; + %tmp3 = fadd <4 x float> %A, %tmp2; + %tmp4 = fadd <4 x float> %A, %tmp3; + %tmp5 = fadd <4 x float> %A, %tmp4; + %tmp6 = fadd <4 x float> %A, %tmp5; + %tmp7 = fadd <4 x float> %A, %tmp6; + %tmp8 = fadd <4 x float> %A, %tmp7; + %tmp9 = fdiv <4 x float> %A, %B; + %tmp10 = fadd <4 x float> %tmp8, %tmp9; + + ret <4 x float> %tmp10 +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll new file mode 100644 index 0000000..f58c598 --- /dev/null +++ b/test/CodeGen/AArch64/mul-lohi.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s + +define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { +; CHECK-LABEL: test_128bitmul: +; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2 +; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]] +; CHECK: madd x1, x1, x2, [[PART1]] +; CHECK: mul x0, x0, x2 + +; CHECK-BE-LABEL: test_128bitmul: +; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3 +; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]] +; CHECK-BE: madd x0, x0, x3, [[PART1]] +; CHECK-BE: mul x1, x1, x3 + + %prod = mul i128 %lhs, %rhs + ret i128 %prod +} diff --git a/test/CodeGen/AArch64/neon-2velem.ll b/test/CodeGen/AArch64/neon-2velem.ll index 9d61842..acffb14 100644 --- a/test/CodeGen/AArch64/neon-2velem.ll +++ b/test/CodeGen/AArch64/neon-2velem.ll @@ -45,6 +45,7 @@ declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmla_lane_s16: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -55,6 +56,7 @@ entry: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlaq_lane_s16: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -65,6 +67,7 @@ entry: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmla_lane_s32: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -75,6 +78,7 @@ entry: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlaq_lane_s32: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -85,6 +89,7 @@ entry: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmla_laneq_s16: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -95,6 +100,7 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlaq_laneq_s16: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -105,6 +111,7 @@ entry: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmla_laneq_s32: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -115,6 +122,7 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlaq_laneq_s32: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -125,6 +133,7 @@ entry: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmls_lane_s16: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -135,6 +144,7 @@ entry: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsq_lane_s16: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -145,6 +155,7 @@ entry: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmls_lane_s32: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -155,6 +166,7 @@ entry: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsq_lane_s32: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -165,6 +177,7 @@ entry: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmls_laneq_s16: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -175,6 +188,7 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsq_laneq_s16: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -185,6 +199,7 @@ entry: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmls_laneq_s32: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -195,6 +210,7 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsq_laneq_s32: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -205,6 +221,7 @@ entry: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_s16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -214,6 +231,7 @@ entry: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_s16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -223,6 +241,7 @@ entry: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_s32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -232,6 +251,7 @@ entry: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_s32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -241,6 +261,7 @@ entry: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_u16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -250,6 +271,7 @@ entry: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_u16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -259,6 +281,7 @@ entry: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_u32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -268,6 +291,7 @@ entry: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_u32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -277,6 +301,7 @@ entry: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_s16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -286,6 +311,7 @@ entry: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_s16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -295,6 +321,7 @@ entry: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_s32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -304,6 +331,7 @@ entry: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_s32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -313,6 +341,7 @@ entry: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_u16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -322,6 +351,7 @@ entry: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_u16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -331,6 +361,7 @@ entry: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_u32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -340,6 +371,7 @@ entry: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_u32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -349,6 +381,7 @@ entry: define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfma_lane_f32: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -360,6 +393,7 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmaq_lane_f32: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -371,6 +405,7 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfma_laneq_f32: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -380,6 +415,7 @@ entry: define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmaq_laneq_f32: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -389,6 +425,7 @@ entry: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfms_lane_f32: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> @@ -399,6 +436,7 @@ entry: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmsq_lane_f32: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> @@ -409,6 +447,7 @@ entry: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfms_laneq_f32: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> @@ -419,6 +458,7 @@ entry: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmsq_laneq_f32: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> @@ -429,6 +469,7 @@ entry: define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { ; CHECK: test_vfmaq_lane_f64: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -440,6 +481,7 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmaq_laneq_f64: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -449,6 +491,7 @@ entry: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { ; CHECK: test_vfmsq_lane_f64: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %sub = fsub <1 x double> , %v %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer @@ -459,6 +502,7 @@ entry: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmsq_laneq_f64: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> @@ -466,9 +510,57 @@ entry: ret <2 x double> %0 } +define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { +; CHECK-LABEL: test_vfmas_laneq_f32 +; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +entry: + %extract = extractelement <4 x float> %v, i32 3 + %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) + ret float %0 +} + +declare float @llvm.fma.f32(float, float, float) + +define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { +; CHECK-LABEL: test_vfmsd_lane_f64 +; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <1 x double> %v, i32 0 + %extract = fsub double -0.000000e+00, %extract.rhs + %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) + ret double %0 +} + +declare double @llvm.fma.f64(double, double, double) + +define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { +; CHECK: test_vfmss_laneq_f32 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <4 x float> %v, i32 3 + %extract = fsub float -0.000000e+00, %extract.rhs + %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) + ret float %0 +} + +define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { +; CHECK-LABEL: test_vfmsd_laneq_f64 +; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <2 x double> %v, i32 1 + %extract = fsub double -0.000000e+00, %extract.rhs + %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) + ret double %0 +} + define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -479,6 +571,7 @@ entry: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_s32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -489,6 +582,7 @@ entry: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -499,6 +593,7 @@ entry: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_s32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -509,6 +604,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_s16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -520,6 +616,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_s32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -531,6 +628,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_s16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -542,6 +640,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_s32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -553,6 +652,7 @@ entry: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_s16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -563,6 +663,7 @@ entry: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_s32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -573,6 +674,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_s16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -583,6 +685,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_s32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -593,6 +696,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_s16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -604,6 +708,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_s32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -615,6 +720,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_s16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -626,6 +732,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_s32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -637,6 +744,7 @@ entry: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_u16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -647,6 +755,7 @@ entry: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_u32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -657,6 +766,7 @@ entry: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_u16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -667,6 +777,7 @@ entry: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_u32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -677,6 +788,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_u16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -688,6 +800,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_u32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -699,6 +812,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_u16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -710,6 +824,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_u32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -721,6 +836,7 @@ entry: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_u16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -731,6 +847,7 @@ entry: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_u32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -741,6 +858,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_u16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -751,6 +869,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_u32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -761,6 +880,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_u16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -772,6 +892,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_u32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -783,6 +904,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_u16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -794,6 +916,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_u32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -805,6 +928,7 @@ entry: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_s16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -814,6 +938,7 @@ entry: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_s32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -823,6 +948,7 @@ entry: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_u16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -832,6 +958,7 @@ entry: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_u32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -841,6 +968,7 @@ entry: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_s16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -851,6 +979,7 @@ entry: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_s32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -861,6 +990,7 @@ entry: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_u16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -871,6 +1001,7 @@ entry: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_u32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -881,6 +1012,7 @@ entry: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_s16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -890,6 +1022,7 @@ entry: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_s32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -899,6 +1032,7 @@ entry: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_u16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -908,6 +1042,7 @@ entry: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_u32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -917,6 +1052,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_s16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -927,6 +1063,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_s32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -937,6 +1074,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_u16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -947,6 +1085,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_u32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -957,6 +1096,7 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_lane_s16: ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -967,6 +1107,7 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_lane_s32: ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -977,6 +1118,7 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_high_lane_s16: ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -988,6 +1130,7 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_high_lane_s32: ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -999,6 +1142,7 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_lane_s16: ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1009,6 +1153,7 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_lane_s32: ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1019,6 +1164,7 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_high_lane_s16: ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1030,6 +1176,7 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_high_lane_s32: ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1041,6 +1188,7 @@ entry: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_lane_s16: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1050,6 +1198,7 @@ entry: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_lane_s32: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1059,6 +1208,7 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_laneq_s16: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1068,6 +1218,7 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_laneq_s32: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1077,6 +1228,7 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_high_lane_s16: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1087,6 +1239,7 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_high_lane_s32: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1097,6 +1250,7 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_high_laneq_s16: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1107,6 +1261,7 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_high_laneq_s32: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1117,6 +1272,7 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulh_lane_s16: ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1126,6 +1282,7 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulhq_lane_s16: ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1135,6 +1292,7 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulh_lane_s32: ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1144,6 +1302,7 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulhq_lane_s32: ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1153,6 +1312,7 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulh_lane_s16: ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1162,6 +1322,7 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulhq_lane_s16: ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1171,6 +1332,7 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulh_lane_s32: ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1180,6 +1342,7 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulhq_lane_s32: ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1189,6 +1352,7 @@ entry: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmul_lane_f32: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1198,6 +1362,7 @@ entry: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { ; CHECK: test_vmul_lane_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1210,6 +1375,7 @@ entry: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulq_lane_f32: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1219,6 +1385,7 @@ entry: define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK: test_vmulq_lane_f64: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -1228,6 +1395,7 @@ entry: define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmul_laneq_f32: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1237,6 +1405,7 @@ entry: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { ; CHECK: test_vmul_laneq_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1249,6 +1418,7 @@ entry: define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulq_laneq_f32: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1258,6 +1428,7 @@ entry: define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulq_laneq_f64: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %mul = fmul <2 x double> %shuffle, %a @@ -1267,6 +1438,7 @@ entry: define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmulx_lane_f32: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1276,6 +1448,7 @@ entry: define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulxq_lane_f32: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1285,6 +1458,7 @@ entry: define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK: test_vmulxq_lane_f64: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1294,6 +1468,7 @@ entry: define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmulx_laneq_f32: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1303,6 +1478,7 @@ entry: define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulxq_laneq_f32: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1312,6 +1488,7 @@ entry: define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulxq_laneq_f64: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1321,6 +1498,7 @@ entry: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmla_lane_s16_0: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1331,6 +1509,7 @@ entry: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlaq_lane_s16_0: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1341,6 +1520,7 @@ entry: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmla_lane_s32_0: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1351,6 +1531,7 @@ entry: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlaq_lane_s32_0: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1361,6 +1542,7 @@ entry: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmla_laneq_s16_0: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1371,6 +1553,7 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlaq_laneq_s16_0: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1381,6 +1564,7 @@ entry: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmla_laneq_s32_0: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1391,6 +1575,7 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlaq_laneq_s32_0: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1401,6 +1586,7 @@ entry: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmls_lane_s16_0: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1411,6 +1597,7 @@ entry: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsq_lane_s16_0: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1421,6 +1608,7 @@ entry: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmls_lane_s32_0: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1431,6 +1619,7 @@ entry: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsq_lane_s32_0: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1441,6 +1630,7 @@ entry: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmls_laneq_s16_0: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1451,6 +1641,7 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsq_laneq_s16_0: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1461,6 +1652,7 @@ entry: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmls_laneq_s32_0: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1471,6 +1663,7 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsq_laneq_s32_0: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1481,6 +1674,7 @@ entry: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_s16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1490,6 +1684,7 @@ entry: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_s16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1499,6 +1694,7 @@ entry: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_s32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1508,6 +1704,7 @@ entry: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_s32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1517,6 +1714,7 @@ entry: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_u16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1526,6 +1724,7 @@ entry: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_u16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1535,6 +1734,7 @@ entry: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_u32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1544,6 +1744,7 @@ entry: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_u32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1553,6 +1754,7 @@ entry: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_s16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1562,6 +1764,7 @@ entry: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_s16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1571,6 +1774,7 @@ entry: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_s32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1580,6 +1784,7 @@ entry: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_s32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1589,6 +1794,7 @@ entry: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_u16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1598,6 +1804,7 @@ entry: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_u16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1607,6 +1814,7 @@ entry: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_u32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1616,6 +1824,7 @@ entry: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_u32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1625,6 +1834,7 @@ entry: define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfma_lane_f32_0: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1634,6 +1844,7 @@ entry: define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmaq_lane_f32_0: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1643,6 +1854,7 @@ entry: define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfma_laneq_f32_0: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1652,6 +1864,7 @@ entry: define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmaq_laneq_f32_0: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1661,6 +1874,7 @@ entry: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfms_lane_f32_0: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer @@ -1671,6 +1885,7 @@ entry: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmsq_lane_f32_0: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer @@ -1681,6 +1896,7 @@ entry: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfms_laneq_f32_0: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer @@ -1691,6 +1907,7 @@ entry: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmsq_laneq_f32_0: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer @@ -1701,6 +1918,7 @@ entry: define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmaq_laneq_f64_0: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -1710,6 +1928,7 @@ entry: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmsq_laneq_f64_0: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer @@ -1720,6 +1939,7 @@ entry: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_s16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1730,6 +1950,7 @@ entry: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_s32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1740,6 +1961,7 @@ entry: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_s16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1750,6 +1972,7 @@ entry: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_s32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1760,6 +1983,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_s16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1771,6 +1995,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_s32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1782,6 +2007,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_s16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1793,6 +2019,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_s32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -1804,6 +2031,7 @@ entry: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_s16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1814,6 +2042,7 @@ entry: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_s32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1824,6 +2053,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_s16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1834,6 +2064,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_s32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1844,6 +2075,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_s16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1855,6 +2087,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_s32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1866,6 +2099,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_s16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1877,6 +2111,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_s32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -1888,6 +2123,7 @@ entry: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_u16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1898,6 +2134,7 @@ entry: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_u32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1908,6 +2145,7 @@ entry: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_u16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1918,6 +2156,7 @@ entry: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_u32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1928,6 +2167,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_u16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1939,6 +2179,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_u32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1950,6 +2191,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_u16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1961,6 +2203,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_u32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -1972,6 +2215,7 @@ entry: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_u16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1982,6 +2226,7 @@ entry: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_u32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1992,6 +2237,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_u16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2002,6 +2248,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_u32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2012,6 +2259,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_u16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2023,6 +2271,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_u32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2034,6 +2283,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_u16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2045,6 +2295,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_u32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2056,6 +2307,7 @@ entry: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_s16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2065,6 +2317,7 @@ entry: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_s32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2074,6 +2327,7 @@ entry: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_u16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2083,6 +2337,7 @@ entry: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_u32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2092,6 +2347,7 @@ entry: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_s16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2102,6 +2358,7 @@ entry: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_s32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2112,6 +2369,7 @@ entry: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_u16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2122,6 +2380,7 @@ entry: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_u32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2132,6 +2391,7 @@ entry: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_s16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2141,6 +2401,7 @@ entry: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_s32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2150,6 +2411,7 @@ entry: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_u16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2159,6 +2421,7 @@ entry: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_u32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2168,6 +2431,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_s16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2178,6 +2442,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_s32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2188,6 +2453,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_u16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2198,6 +2464,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_u32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2208,6 +2475,7 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_lane_s16_0: ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2218,6 +2486,7 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_lane_s32_0: ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2228,6 +2497,7 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_high_lane_s16_0: ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2239,6 +2509,7 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_high_lane_s32_0: ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2250,6 +2521,7 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_lane_s16_0: ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2260,6 +2532,7 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_lane_s32_0: ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2270,6 +2543,7 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_high_lane_s16_0: ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2281,6 +2555,7 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_high_lane_s32_0: ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2292,6 +2567,7 @@ entry: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_lane_s16_0: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2301,6 +2577,7 @@ entry: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_lane_s32_0: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2310,6 +2587,7 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_laneq_s16_0: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2319,6 +2597,7 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_laneq_s32_0: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2328,6 +2607,7 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_high_lane_s16_0: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2338,6 +2618,7 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_high_lane_s32_0: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2348,6 +2629,7 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_high_laneq_s16_0: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2358,6 +2640,7 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_high_laneq_s32_0: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2368,6 +2651,7 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulh_lane_s16_0: ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2377,6 +2661,7 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulhq_lane_s16_0: ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2386,6 +2671,7 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulh_lane_s32_0: ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2395,6 +2681,7 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulhq_lane_s32_0: ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2404,6 +2691,7 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulh_lane_s16_0: ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2413,6 +2701,7 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulhq_lane_s16_0: ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2422,6 +2711,7 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulh_lane_s32_0: ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2431,6 +2721,7 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulhq_lane_s32_0: ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2440,6 +2731,7 @@ entry: define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmul_lane_f32_0: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2449,6 +2741,7 @@ entry: define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulq_lane_f32_0: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2458,6 +2751,7 @@ entry: define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmul_laneq_f32_0: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2467,6 +2761,7 @@ entry: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { ; CHECK: test_vmul_laneq_f64_0: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -2479,6 +2774,7 @@ entry: define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulq_laneq_f32_0: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2488,6 +2784,7 @@ entry: define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulq_laneq_f64_0: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -2497,6 +2794,7 @@ entry: define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmulx_lane_f32_0: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2506,6 +2804,7 @@ entry: define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulxq_lane_f32_0: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2515,6 +2814,7 @@ entry: define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { ; CHECK: test_vmulxq_lane_f64_0: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2524,6 +2824,7 @@ entry: define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmulx_laneq_f32_0: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2533,6 +2834,7 @@ entry: define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulxq_laneq_f32_0: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2542,6 +2844,7 @@ entry: define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulxq_laneq_f64_0: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll index 171e2b2..96400eb 100644 --- a/test/CodeGen/AArch64/neon-3vdiff.ll +++ b/test/CodeGen/AArch64/neon-3vdiff.ll @@ -1804,3 +1804,30 @@ entry: ret <8 x i16> %vmull.i.i } +define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { +; CHECK: test_vmull_p64 +; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d +entry: + %vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0 + %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1 + %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 + ret i128 %vmull3.i +} + +define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { +; CHECK: test_vmull_high_p64 +; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +entry: + %0 = extractelement <2 x i64> %a, i32 1 + %1 = extractelement <2 x i64> %b, i32 1 + %vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1 + %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 + ret i128 %vmull3.i.i +} + +declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5 + + diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll index 733db97..6d30c95 100644 --- a/test/CodeGen/AArch64/neon-across.ll +++ b/test/CodeGen/AArch64/neon-across.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vminnmv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vminv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vmaxv(<4 x float>) declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>) @@ -442,8 +442,7 @@ define float @test_vmaxvq_f32(<4 x float> %a) { ; CHECK: test_vmaxvq_f32: ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vmaxv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vmaxv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a) ret float %0 } @@ -451,8 +450,7 @@ define float @test_vminvq_f32(<4 x float> %a) { ; CHECK: test_vminvq_f32: ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vminv.i = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vminv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a) ret float %0 } @@ -460,8 +458,7 @@ define float @test_vmaxnmvq_f32(<4 x float> %a) { ; CHECK: test_vmaxnmvq_f32: ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vmaxnmv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vmaxnmv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a) ret float %0 } @@ -469,8 +466,7 @@ define float @test_vminnmvq_f32(<4 x float> %a) { ; CHECK: test_vminnmvq_f32: ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vminnmv.i = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vminnmv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a) ret float %0 } diff --git a/test/CodeGen/AArch64/neon-add-pairwise.ll b/test/CodeGen/AArch64/neon-add-pairwise.ll index 1abfed3..32d8222 100644 --- a/test/CodeGen/AArch64/neon-add-pairwise.ll +++ b/test/CodeGen/AArch64/neon-add-pairwise.ll @@ -90,3 +90,12 @@ define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ret <2 x double> %val } +define i32 @test_vaddv.v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddv.v2i32 +; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll index 078ba14..9015237 100644 --- a/test/CodeGen/AArch64/neon-add-sub.ll +++ b/test/CodeGen/AArch64/neon-add-sub.ll @@ -1,119 +1,119 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: add {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = add <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: add {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = add <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: add {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp3 = add <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: add {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp3 = add <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: add {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = add <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: add {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = add <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = add <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fadd {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fadd <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fadd {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fadd <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fadd <2 x double> %A, %B; ret <2 x double> %tmp3 } define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sub {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = sub <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sub {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = sub <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sub {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp3 = sub <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sub {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp3 = sub <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = sub <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = sub <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = sub <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fsub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fsub <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fsub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fsub <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fsub <2 x double> %A, %B; ret <2 x double> %tmp3 } @@ -234,4 +234,46 @@ declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) \ No newline at end of file +declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) + +define <1 x i8> @test_add_v1i8(<1 x i8> %a, <1 x i8> %b) { +;CHECK-LABEL: test_add_v1i8: +;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %c = add <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @test_add_v1i16(<1 x i16> %a, <1 x i16> %b) { +;CHECK-LABEL: test_add_v1i16: +;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %c = add <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @test_add_v1i32(<1 x i32> %a, <1 x i32> %b) { +;CHECK-LABEL: test_add_v1i32: +;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %c = add <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @test_sub_v1i8(<1 x i8> %a, <1 x i8> %b) { +;CHECK-LABEL: test_sub_v1i8: +;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %c = sub <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @test_sub_v1i16(<1 x i16> %a, <1 x i16> %b) { +;CHECK-LABEL: test_sub_v1i16: +;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %c = sub <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @test_sub_v1i32(<1 x i32> %a, <1 x i32> %b) { +;CHECK-LABEL: test_sub_v1i32: +;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %c = sub <1 x i32> %a, %b + ret <1 x i32> %c +} \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll index f9ec704..61099d4 100644 --- a/test/CodeGen/AArch64/neon-bitcast.ll +++ b/test/CodeGen/AArch64/neon-bitcast.ll @@ -20,8 +20,8 @@ define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind { ret <2 x i32> %val } -define <2 x float> @test_v8i8_to_v1f32(<8 x i8> %in) nounwind{ -; CHECK: test_v8i8_to_v1f32: +define <2 x float> @test_v8i8_to_v2f32(<8 x i8> %in) nounwind{ +; CHECK: test_v8i8_to_v2f32: ; CHECK-NEXT: // BB#0: ; CHECK-NEXT: ret @@ -67,8 +67,8 @@ define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind { ret <2 x i32> %val } -define <2 x float> @test_v4i16_to_v1f32(<4 x i16> %in) nounwind{ -; CHECK: test_v4i16_to_v1f32: +define <2 x float> @test_v4i16_to_v2f32(<4 x i16> %in) nounwind{ +; CHECK: test_v4i16_to_v2f32: ; CHECK-NEXT: // BB#0: ; CHECK-NEXT: ret @@ -114,8 +114,8 @@ define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind { ret <2 x i32> %val } -define <2 x float> @test_v2i32_to_v1f32(<2 x i32> %in) nounwind{ -; CHECK: test_v2i32_to_v1f32: +define <2 x float> @test_v2i32_to_v2f32(<2 x i32> %in) nounwind{ +; CHECK: test_v2i32_to_v2f32: ; CHECK-NEXT: // BB#0: ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 1c43b97..7e5b693 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1,502 +1,502 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <8 x i8> %a, %b; ret <8 x i8> %tmp1 } define <16 x i8> @and16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <16 x i8> %a, %b; ret <16 x i8> %tmp1 } define <8 x i8> @orr8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <8 x i8> %a, %b; ret <8 x i8> %tmp1 } define <16 x i8> @orr16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <16 x i8> %a, %b; ret <16 x i8> %tmp1 } define <8 x i8> @xor8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %a, %b; ret <8 x i8> %tmp1 } define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %a, %b; ret <16 x i8> %tmp1 } define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b - %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > - %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 > + %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 > %tmp3 = or <8 x i8> %tmp1, %tmp2 ret <8 x i8> %tmp3 } define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b - %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > - %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 > + %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp3 = or <16 x i8> %tmp1, %tmp2 ret <16 x i8> %tmp3 } define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = or <8 x i8> %a, %tmp1 ret <8 x i8> %tmp2 } define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = or <16 x i8> %a, %tmp1 ret <16 x i8> %tmp2 } define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <8 x i8> %a, %tmp1 ret <8 x i8> %tmp2 } define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <16 x i8> %a, %tmp1 ret <16 x i8> %tmp2 } define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff +;CHECK: orr {{v[0-9]+}}.2s, #0xff %tmp1 = or <2 x i32> %a, < i32 255, i32 255> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #8 %tmp1 = or <2 x i32> %a, < i32 65280, i32 65280> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #16 +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #16 %tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #24 +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #24 %tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080> ret <2 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff +;CHECK: orr {{v[0-9]+}}.4s, #0xff %tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #8 %tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #16 +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #16 %tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #24 +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #24 %tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080> ret <4 x i32> %tmp1 } define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff +;CHECK: orr {{v[0-9]+}}.4h, #0xff %tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 > ret <4 x i16> %tmp1 } define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 %tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 > ret <4 x i16> %tmp1 } define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff +;CHECK: orr {{v[0-9]+}}.8h, #0xff %tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > ret <8 x i16> %tmp1 } define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 %tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > ret <8 x i16> %tmp1 } define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10 +;CHECK: bic {{v[0-9]+}}.2s, #0x10 %tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #8 - %tmp1 = and <2 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519 > +;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #8 + %tmp1 = and <2 x i32> %a, < i32 4294963199, i32 4294963199 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #16 - %tmp1 = and <2 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039 > +;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #16 + %tmp1 = and <2 x i32> %a, < i32 4293918719, i32 4293918719 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #24 - %tmp1 = and <2 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159> +;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #24 + %tmp1 = and <2 x i32> %a, < i32 4026531839, i32 4026531839> ret <2 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10 +;CHECK: bic {{v[0-9]+}}.4s, #0x10 %tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #8 - %tmp1 = and <4 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519 > +;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #8 + %tmp1 = and <4 x i32> %a, < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #16 - %tmp1 = and <4 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039 > +;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #16 + %tmp1 = and <4 x i32> %a, < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #24 - %tmp1 = and <4 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159> +;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #24 + %tmp1 = and <4 x i32> %a, < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839> ret <4 x i32> %tmp1 } define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x10 - %tmp1 = and <4 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 > +;CHECK: bic {{v[0-9]+}}.4h, #0x10 + %tmp1 = and <4 x i16> %a, < i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279 > ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x0 +;CHECK: bic {{v[0-9]+}}.4h, #0xff %tmp1 = and <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 > ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x10, lsl #8 - %tmp1 = and <4 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519> +;CHECK: bic {{v[0-9]+}}.4h, #0x10, lsl #8 + %tmp1 = and <4 x i16> %a, < i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199> ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x0, lsl #8 +;CHECK: bic {{v[0-9]+}}.4h, #0xff, lsl #8 %tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255> ret <4 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x10 - %tmp1 = and <8 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, - i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 > +;CHECK: bic {{v[0-9]+}}.8h, #0x10 + %tmp1 = and <8 x i16> %a, < i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279, + i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279 > ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x0 +;CHECK: bic {{v[0-9]+}}.8h, #0xff %tmp1 = and <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x10, lsl #8 - %tmp1 = and <8 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, - i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519> +;CHECK: bic {{v[0-9]+}}.8h, #0x10, lsl #8 + %tmp1 = and <8 x i16> %a, < i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199, + i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199> ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x0, lsl #8 +;CHECK: bic {{v[0-9]+}}.8h, #0xff, lsl #8 %tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> ret <8 x i16> %tmp1 } define <2 x i32> @and2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <2 x i32> %a, %b; ret <2 x i32> %tmp1 } define <4 x i16> @and4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <4 x i16> %a, %b; ret <4 x i16> %tmp1 } define <1 x i64> @and1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <1 x i64> %a, %b; ret <1 x i64> %tmp1 } define <4 x i32> @and4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <4 x i32> %a, %b; ret <4 x i32> %tmp1 } define <8 x i16> @and8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <8 x i16> %a, %b; ret <8 x i16> %tmp1 } define <2 x i64> @and2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <2 x i64> %a, %b; ret <2 x i64> %tmp1 } define <2 x i32> @orr2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <2 x i32> %a, %b; ret <2 x i32> %tmp1 } define <4 x i16> @orr4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <4 x i16> %a, %b; ret <4 x i16> %tmp1 } define <1 x i64> @orr1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <1 x i64> %a, %b; ret <1 x i64> %tmp1 } define <4 x i32> @orr4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <4 x i32> %a, %b; ret <4 x i32> %tmp1 } define <8 x i16> @orr8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <8 x i16> %a, %b; ret <8 x i16> %tmp1 } define <2 x i64> @orr2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <2 x i64> %a, %b; ret <2 x i64> %tmp1 } define <2 x i32> @eor2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %a, %b; ret <2 x i32> %tmp1 } define <4 x i16> @eor4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %a, %b; ret <4 x i16> %tmp1 } define <1 x i64> @eor1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %a, %b; ret <1 x i64> %tmp1 } define <4 x i32> @eor4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %a, %b; ret <4 x i32> %tmp1 } define <8 x i16> @eor8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %a, %b; ret <8 x i16> %tmp1 } define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %a, %b; ret <2 x i64> %tmp1 } define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 > %tmp2 = and <2 x i32> %a, %tmp1 ret <2 x i32> %tmp2 } define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 > %tmp2 = and <4 x i16> %a, %tmp1 ret <4 x i16> %tmp2 } define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %b, < i64 -1> %tmp2 = and <1 x i64> %a, %tmp1 ret <1 x i64> %tmp2 } define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1> %tmp2 = and <4 x i32> %a, %tmp1 ret <4 x i32> %tmp2 } define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 > %tmp2 = and <8 x i16> %a, %tmp1 ret <8 x i16> %tmp2 } define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1> %tmp2 = and <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 } define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 > %tmp2 = or <2 x i32> %a, %tmp1 ret <2 x i32> %tmp2 } define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 > %tmp2 = or <4 x i16> %a, %tmp1 ret <4 x i16> %tmp2 } define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %b, < i64 -1> %tmp2 = or <1 x i64> %a, %tmp1 ret <1 x i64> %tmp2 } define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1> %tmp2 = or <4 x i32> %a, %tmp1 ret <4 x i32> %tmp2 } define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 > %tmp2 = or <8 x i16> %a, %tmp1 ret <8 x i16> %tmp2 } define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1> %tmp2 = or <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 } + define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b - %tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 > - %tmp2 = and <2 x i32> %b, < i32 0, i32 0 > +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = and <2 x i32> %a, < i32 -1, i32 0 > + %tmp2 = and <2 x i32> %b, < i32 0, i32 -1 > %tmp3 = or <2 x i32> %tmp1, %tmp2 ret <2 x i32> %tmp3 } define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b - %tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 > - %tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 > +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 > + %tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 > %tmp3 = or <4 x i16> %tmp1, %tmp2 ret <4 x i16> %tmp3 } define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b - %tmp1 = and <1 x i64> %a, < i64 -1 > - %tmp2 = and <1 x i64> %b, < i64 0 > +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = and <1 x i64> %a, < i64 -16 > + %tmp2 = and <1 x i64> %b, < i64 15 > %tmp3 = or <1 x i64> %tmp1, %tmp2 ret <1 x i64> %tmp3 } define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b - %tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 > - %tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 > +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 > + %tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 > %tmp3 = or <4 x i32> %tmp1, %tmp2 ret <4 x i32> %tmp3 } define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b - %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 > - %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 > +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 > + %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 > %tmp3 = or <8 x i16> %tmp1, %tmp2 ret <8 x i16> %tmp3 } define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b - %tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 > - %tmp2 = and <2 x i64> %b, < i64 0, i64 0 > +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %tmp1 = and <2 x i64> %a, < i64 -1, i64 0 > + %tmp2 = and <2 x i64> %b, < i64 0, i64 -1 > %tmp3 = or <2 x i64> %tmp1, %tmp2 ret <2 x i64> %tmp3 } define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <8 x i8> %v1, %v2 %2 = xor <8 x i8> %v1, %3 = and <8 x i8> %2, %v3 @@ -505,7 +505,7 @@ define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { } define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <4 x i16> %v1, %v2 %2 = xor <4 x i16> %v1, %3 = and <4 x i16> %2, %v3 @@ -514,7 +514,7 @@ define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { } define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <2 x i32> %v1, %v2 %2 = xor <2 x i32> %v1, %3 = and <2 x i32> %2, %v3 @@ -523,7 +523,7 @@ define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { } define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <1 x i64> %v1, %v2 %2 = xor <1 x i64> %v1, %3 = and <1 x i64> %2, %v3 @@ -532,7 +532,7 @@ define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { } define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <16 x i8> %v1, %v2 %2 = xor <16 x i8> %v1, %3 = and <16 x i8> %2, %v3 @@ -541,7 +541,7 @@ define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { } define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <8 x i16> %v1, %v2 %2 = xor <8 x i16> %v1, %3 = and <8 x i16> %2, %v3 @@ -550,7 +550,7 @@ define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { } define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <4 x i32> %v1, %v2 %2 = xor <4 x i32> %v1, %3 = and <4 x i32> %2, %v3 @@ -558,8 +558,65 @@ define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ret <4 x i32> %4 } +define <8 x i8> @vselect_v8i8(<8 x i8> %a) { +;CHECK: movi {{d[0-9]+}}, #0xffff +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %b = select <8 x i1> , <8 x i8> %a, <8 x i8> + ret <8 x i8> %b +} + +define <4 x i16> @vselect_v4i16(<4 x i16> %a) { +;CHECK: movi {{d[0-9]+}}, #0xffff +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %b = select <4 x i1> , <4 x i16> %a, <4 x i16> + ret <4 x i16> %b +} + +define <8 x i8> @vselect_cmp_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp ne <8 x i8> %a, %b + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_cmp_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp eq <8 x i8> %a, %b + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp ne <8 x i8> %a, zeroinitializer + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_cmpz_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp eq <8 x i8> %a, zeroinitializer + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp3 = and <8 x i8> %a, %b + %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer + %d = select <8 x i1> %tmp4, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <2 x i64> %v1, %v2 %2 = xor <2 x i64> %v1, %3 = and <2 x i64> %2, %v3 @@ -568,27 +625,459 @@ define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { } define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff +;CHECK: orr {{v[0-9]+}}.4h, #0xff %val = or <8 x i8> %a, ret <8 x i8> %val } define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 %val = or <8 x i8> %a, ret <8 x i8> %val } define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff +;CHECK: orr {{v[0-9]+}}.8h, #0xff %val = or <16 x i8> %a, ret <16 x i8> %val } define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 %val = or <16 x i8> %a, ret <16 x i8> %val } +define <8 x i8> @and8imm2s_lsl0(<8 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff + %tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @and8imm2s_lsl8(<8 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff, lsl #8 + %tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @and8imm2s_lsl16(<8 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff, lsl #16 + %tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @and8imm2s_lsl24(<8 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xfe, lsl #24 + %tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1> + ret <8 x i8> %tmp1 +} + +define <4 x i16> @and16imm2s_lsl0(<4 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff + %tmp1 = and <4 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535> + ret <4 x i16> %tmp1 +} + +define <4 x i16> @and16imm2s_lsl8(<4 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff, lsl #8 + %tmp1 = and <4 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535> + ret <4 x i16> %tmp1 +} + +define <4 x i16> @and16imm2s_lsl16(<4 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff, lsl #16 + %tmp1 = and <4 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280> + ret <4 x i16> %tmp1 +} + +define <4 x i16> @and16imm2s_lsl24(<4 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xfe, lsl #24 + %tmp1 = and <4 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511> + ret <4 x i16> %tmp1 +} + + +define <1 x i64> @and64imm2s_lsl0(<1 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff + %tmp1 = and <1 x i64> %a, < i64 -1095216660736> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @and64imm2s_lsl8(<1 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff, lsl #8 + %tmp1 = and <1 x i64> %a, < i64 -280375465148161> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @and64imm2s_lsl16(<1 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xff, lsl #16 + %tmp1 = and <1 x i64> %a, < i64 -71776119077928961> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @and64imm2s_lsl24(<1 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.2s, #0xfe, lsl #24 + %tmp1 = and <1 x i64> %a, < i64 144115183814443007> + ret <1 x i64> %tmp1 +} + +define <16 x i8> @and8imm4s_lsl0(<16 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff + %tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @and8imm4s_lsl8(<16 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff, lsl #8 + %tmp1 = and <16 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @and8imm4s_lsl16(<16 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff, lsl #16 + %tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @and8imm4s_lsl24(<16 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xfe, lsl #24 + %tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1> + ret <16 x i8> %tmp1 +} + +define <8 x i16> @and16imm4s_lsl0(<8 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff + %tmp1 = and <8 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535> + ret <8 x i16> %tmp1 +} + +define <8 x i16> @and16imm4s_lsl8(<8 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff, lsl #8 + %tmp1 = and <8 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535> + ret <8 x i16> %tmp1 +} + +define <8 x i16> @and16imm4s_lsl16(<8 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff, lsl #16 + %tmp1 = and <8 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280> + ret <8 x i16> %tmp1 +} + +define <8 x i16> @and16imm4s_lsl24(<8 x i16> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xfe, lsl #24 + %tmp1 = and <8 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511> + ret <8 x i16> %tmp1 +} + +define <2 x i64> @and64imm4s_lsl0(<2 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff + %tmp1 = and <2 x i64> %a, < i64 -1095216660736, i64 -1095216660736> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @and64imm4s_lsl8(<2 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff, lsl #8 + %tmp1 = and <2 x i64> %a, < i64 -280375465148161, i64 -280375465148161> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @and64imm4s_lsl16(<2 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xff, lsl #16 + %tmp1 = and <2 x i64> %a, < i64 -71776119077928961, i64 -71776119077928961> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @and64imm4s_lsl24(<2 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.4s, #0xfe, lsl #24 + %tmp1 = and <2 x i64> %a, < i64 144115183814443007, i64 144115183814443007> + ret <2 x i64> %tmp1 +} + +define <8 x i8> @and8imm4h_lsl0(<8 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.4h, #0xff + %tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @and8imm4h_lsl8(<8 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.4h, #0xff, lsl #8 + %tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> + ret <8 x i8> %tmp1 +} + +define <2 x i32> @and16imm4h_lsl0(<2 x i32> %a) { +;CHECK: bic {{v[0-9]+}}.4h, #0xff + %tmp1 = and <2 x i32> %a, < i32 4278255360, i32 4278255360> + ret <2 x i32> %tmp1 +} + +define <2 x i32> @and16imm4h_lsl8(<2 x i32> %a) { +;CHECK: bic {{v[0-9]+}}.4h, #0xff, lsl #8 + %tmp1 = and <2 x i32> %a, < i32 16711935, i32 16711935> + ret <2 x i32> %tmp1 +} + +define <1 x i64> @and64imm4h_lsl0(<1 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.4h, #0xff + %tmp1 = and <1 x i64> %a, < i64 -71777214294589696> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @and64imm4h_lsl8(<1 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.4h, #0xff, lsl #8 + %tmp1 = and <1 x i64> %a, < i64 71777214294589695> + ret <1 x i64> %tmp1 +} + +define <16 x i8> @and8imm8h_lsl0(<16 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.8h, #0xff + %tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255 > + ret <16 x i8> %tmp1 +} + +define <16 x i8> @and8imm8h_lsl8(<16 x i8> %a) { +;CHECK: bic {{v[0-9]+}}.8h, #0xff, lsl #8 + %tmp1 = and <16 x i8> %a, + ret <16 x i8> %tmp1 +} + +define <4 x i32> @and16imm8h_lsl0(<4 x i32> %a) { +;CHECK: bic {{v[0-9]+}}.8h, #0xff + %tmp1 = and <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360> + ret <4 x i32> %tmp1 +} + +define <4 x i32> @and16imm8h_lsl8(<4 x i32> %a) { +;CHECK: bic {{v[0-9]+}}.8h, #0xff, lsl #8 + %tmp1 = and <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935> + ret <4 x i32> %tmp1 +} + +define <2 x i64> @and64imm8h_lsl0(<2 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.8h, #0xff + %tmp1 = and <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @and64imm8h_lsl8(<2 x i64> %a) { +;CHECK: bic {{v[0-9]+}}.8h, #0xff, lsl #8 + %tmp1 = and <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695> + ret <2 x i64> %tmp1 +} + +define <8 x i8> @orr8imm2s_lsl0(<8 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff + %tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @orr8imm2s_lsl8(<8 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #8 + %tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @orr8imm2s_lsl16(<8 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #16 + %tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @orr8imm2s_lsl24(<8 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #24 + %tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255> + ret <8 x i8> %tmp1 +} + +define <4 x i16> @orr16imm2s_lsl0(<4 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff + %tmp1 = or <4 x i16> %a, < i16 255, i16 0, i16 255, i16 0> + ret <4 x i16> %tmp1 +} + +define <4 x i16> @orr16imm2s_lsl8(<4 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #8 + %tmp1 = or <4 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0> + ret <4 x i16> %tmp1 +} + +define <4 x i16> @orr16imm2s_lsl16(<4 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #16 + %tmp1 = or <4 x i16> %a, < i16 0, i16 255, i16 0, i16 255> + ret <4 x i16> %tmp1 +} + +define <4 x i16> @orr16imm2s_lsl24(<4 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #24 + %tmp1 = or <4 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280> + ret <4 x i16> %tmp1 +} + +define <1 x i64> @orr64imm2s_lsl0(<1 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff + %tmp1 = or <1 x i64> %a, < i64 1095216660735> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @orr64imm2s_lsl8(<1 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #8 + %tmp1 = or <1 x i64> %a, < i64 280375465148160> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @orr64imm2s_lsl16(<1 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #16 + %tmp1 = or <1 x i64> %a, < i64 71776119077928960> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @orr64imm2s_lsl24(<1 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #24 + %tmp1 = or <1 x i64> %a, < i64 -72057589759737856> + ret <1 x i64> %tmp1 +} + +define <16 x i8> @orr8imm4s_lsl0(<16 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff + %tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @orr8imm4s_lsl8(<16 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #8 + %tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @orr8imm4s_lsl16(<16 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #16 + %tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @orr8imm4s_lsl24(<16 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #24 + %tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255> + ret <16 x i8> %tmp1 +} + +define <8 x i16> @orr16imm4s_lsl0(<8 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff + %tmp1 = or <8 x i16> %a, < i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0> + ret <8 x i16> %tmp1 +} + +define <8 x i16> @orr16imm4s_lsl8(<8 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #8 + %tmp1 = or <8 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0> + ret <8 x i16> %tmp1 +} + +define <8 x i16> @orr16imm4s_lsl16(<8 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #16 + %tmp1 = or <8 x i16> %a, < i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255> + ret <8 x i16> %tmp1 +} + +define <8 x i16> @orr16imm4s_lsl24(<8 x i16> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #24 + %tmp1 = or <8 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280> + ret <8 x i16> %tmp1 +} + +define <2 x i64> @orr64imm4s_lsl0(<2 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff + %tmp1 = or <2 x i64> %a, < i64 1095216660735, i64 1095216660735> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @orr64imm4s_lsl8(<2 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #8 + %tmp1 = or <2 x i64> %a, < i64 280375465148160, i64 280375465148160> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @orr64imm4s_lsl16(<2 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #16 + %tmp1 = or <2 x i64> %a, < i64 71776119077928960, i64 71776119077928960> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @orr64imm4s_lsl24(<2 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #24 + %tmp1 = or <2 x i64> %a, < i64 -72057589759737856, i64 -72057589759737856> + ret <2 x i64> %tmp1 +} + +define <8 x i8> @orr8imm4h_lsl0(<8 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.4h, #0xff + %tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> + ret <8 x i8> %tmp1 +} + +define <8 x i8> @orr8imm4h_lsl8(<8 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 + %tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> + ret <8 x i8> %tmp1 +} + +define <2 x i32> @orr16imm4h_lsl0(<2 x i32> %a) { +;CHECK: orr {{v[0-9]+}}.4h, #0xff + %tmp1 = or <2 x i32> %a, < i32 16711935, i32 16711935> + ret <2 x i32> %tmp1 +} + +define <2 x i32> @orr16imm4h_lsl8(<2 x i32> %a) { +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 + %tmp1 = or <2 x i32> %a, < i32 4278255360, i32 4278255360> + ret <2 x i32> %tmp1 +} + +define <1 x i64> @orr64imm4h_lsl0(<1 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.4h, #0xff + %tmp1 = or <1 x i64> %a, < i64 71777214294589695> + ret <1 x i64> %tmp1 +} + +define <1 x i64> @orr64imm4h_lsl8(<1 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 + %tmp1 = or <1 x i64> %a, < i64 -71777214294589696> + ret <1 x i64> %tmp1 +} + +define <16 x i8> @orr8imm8h_lsl0(<16 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.8h, #0xff + %tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> + ret <16 x i8> %tmp1 +} + +define <16 x i8> @orr8imm8h_lsl8(<16 x i8> %a) { +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 + %tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> + ret <16 x i8> %tmp1 +} + +define <4 x i32> @orr16imm8h_lsl0(<4 x i32> %a) { +;CHECK: orr {{v[0-9]+}}.8h, #0xff + %tmp1 = or <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935> + ret <4 x i32> %tmp1 +} + +define <4 x i32> @orr16imm8h_lsl8(<4 x i32> %a) { +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 + %tmp1 = or <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360> + ret <4 x i32> %tmp1 +} + +define <2 x i64> @orr64imm8h_lsl0(<2 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.8h, #0xff + %tmp1 = or <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695> + ret <2 x i64> %tmp1 +} + +define <2 x i64> @orr64imm8h_lsl8(<2 x i64> %a) { +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 + %tmp1 = or <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696> + ret <2 x i64> %tmp1 +} diff --git a/test/CodeGen/AArch64/neon-bsl.ll b/test/CodeGen/AArch64/neon-bsl.ll index 6bd923d..c55fd01 100644 --- a/test/CodeGen/AArch64/neon-bsl.ll +++ b/test/CodeGen/AArch64/neon-bsl.ll @@ -220,3 +220,16 @@ entry: ret <2 x double> %vbsl3.i } +define <2 x double> @test_bsl_v2f64(<2 x i1> %v1, <2 x double> %v2, <2 x double> %v3) { +; CHECK-LABEL: test_bsl_v2f64: +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %1 = select <2 x i1> %v1, <2 x double> %v2, <2 x double> %v3 + ret <2 x double> %1 +} + +define <4 x float> @test_bsl_v4f32(<4 x i1> %v1, <4 x float> %v2, <4 x float> %v3) { +; CHECK-LABEL: test_bsl_v4f32: +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %1 = select <4 x i1> %v1, <4 x float> %v2, <4 x float> %v3 + ret <4 x float> %1 +} diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index e18530e..b4d55df 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -2,269 +2,269 @@ define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 ret <16 x i8> %tmp3 } define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 ret <8 x i16> %tmp3 } define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 ret <4 x i32> %tmp3 } define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}} +;CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 ret <2 x i64> %tmp3 } define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 ret <8 x i8> %tmp3 } define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 ret <4 x i16> %tmp3 } define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 ret <2 x i32> %tmp3 } define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 } define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 ret <8 x i16> %tmp4 } define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 ret <4 x i32> %tmp4 } define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x i64> %tmp1, i32 0 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 ret <2 x i64> %tmp4 } define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 ret <4 x float> %tmp4 } define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 ret <2 x double> %tmp4 } define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 } define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 ret <8 x i16> %tmp4 } define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x i32> %tmp1, i32 1 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 ret <4 x i32> %tmp4 } define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x i64> %tmp1, i32 0 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 ret <2 x i64> %tmp4 } define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x float> %tmp1, i32 1 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 ret <4 x float> %tmp4 } define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 ret <2 x double> %tmp4 } define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[7], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 ret <8 x i8> %tmp4 } define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 ret <4 x i16> %tmp4 } define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 ret <2 x i32> %tmp4 } define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x i64> %tmp1, i32 0 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 ret <1 x i64> %tmp4 } define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 ret <2 x float> %tmp4 } define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x double> %tmp1, i32 0 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 ret <1 x double> %tmp4 } define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 ret <8 x i8> %tmp4 } define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 ret <4 x i16> %tmp4 } define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] %tmp3 = extractelement <2 x i32> %tmp1, i32 0 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 ret <2 x i32> %tmp4 } define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x i64> %tmp1, i32 0 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 ret <1 x i64> %tmp4 } define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] %tmp3 = extractelement <2 x float> %tmp1, i32 0 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 ret <2 x float> %tmp4 } define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 ret <1 x double> %tmp4 } define i32 @umovw16b(<16 x i8> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 %tmp4 = zext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw8h(<8 x i16> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = zext i16 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw4s(<4 x i32> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 ret i32 %tmp3 } define i64 @umovx2d(<2 x i64> %tmp1) { -;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] +;CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x i64> %tmp1, i32 0 ret i64 %tmp3 } define i32 @umovw8b(<8 x i8> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] %tmp3 = extractelement <8 x i8> %tmp1, i32 7 %tmp4 = zext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw4h(<4 x i16> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = zext i16 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw2s(<2 x i32> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x i32> %tmp1, i32 1 ret i32 %tmp3 } define i64 @umovx1d(<1 x i64> %tmp1) { -;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}} +;CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} %tmp3 = extractelement <1 x i64> %tmp1, i32 0 ret i64 %tmp3 } define i32 @smovw16b(<16 x i8> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 %tmp4 = sext i8 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -272,7 +272,7 @@ define i32 @smovw16b(<16 x i8> %tmp1) { } define i32 @smovw8h(<8 x i16> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -280,28 +280,28 @@ define i32 @smovw8h(<8 x i16> %tmp1) { } define i32 @smovx16b(<16 x i8> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 %tmp4 = sext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @smovx8h(<8 x i16> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 ret i32 %tmp4 } define i64 @smovx4s(<4 x i32> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = sext i32 %tmp3 to i64 ret i64 %tmp4 } define i32 @smovw8b(<8 x i8> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] %tmp3 = extractelement <8 x i8> %tmp1, i32 4 %tmp4 = sext i8 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -309,7 +309,7 @@ define i32 @smovw8b(<8 x i8> %tmp1) { } define i32 @smovw4h(<4 x i16> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -317,21 +317,21 @@ define i32 @smovw4h(<4 x i16> %tmp1) { } define i32 @smovx8b(<8 x i8> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[6] %tmp3 = extractelement <8 x i8> %tmp1, i32 6 %tmp4 = sext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @smovx4h(<4 x i16> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 ret i32 %tmp4 } define i64 @smovx2s(<2 x i32> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x i32> %tmp1, i32 1 %tmp4 = sext i32 %tmp3 to i64 ret i64 %tmp4 @@ -612,4 +612,791 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) { %res = bitcast i64 %in to <1 x double> ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} ret <1 x double> %res -} \ No newline at end of file +} + +define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { +; CHECK-LABEL: test_bitcastv8i8tov1f64: +; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} + %sub.i = sub <8 x i8> zeroinitializer, %a + %1 = bitcast <8 x i8> %sub.i to <1 x double> + %vcvt.i = fptosi <1 x double> %1 to <1 x i64> + ret <1 x i64> %vcvt.i +} + +define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { +; CHECK-LABEL: test_bitcastv4i16tov1f64: +; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} + %sub.i = sub <4 x i16> zeroinitializer, %a + %1 = bitcast <4 x i16> %sub.i to <1 x double> + %vcvt.i = fptosi <1 x double> %1 to <1 x i64> + ret <1 x i64> %vcvt.i +} + +define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { +; CHECK-LABEL: test_bitcastv2i32tov1f64: +; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} + %sub.i = sub <2 x i32> zeroinitializer, %a + %1 = bitcast <2 x i32> %sub.i to <1 x double> + %vcvt.i = fptosi <1 x double> %1 to <1 x i64> + ret <1 x i64> %vcvt.i +} + +define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { +; CHECK-LABEL: test_bitcastv1i64tov1f64: +; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} + %sub.i = sub <1 x i64> zeroinitializer, %a + %1 = bitcast <1 x i64> %sub.i to <1 x double> + %vcvt.i = fptosi <1 x double> %1 to <1 x i64> + ret <1 x i64> %vcvt.i +} + +define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { +; CHECK-LABEL: test_bitcastv2f32tov1f64: +; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} + %sub.i = fsub <2 x float> , %a + %1 = bitcast <2 x float> %sub.i to <1 x double> + %vcvt.i = fptosi <1 x double> %1 to <1 x i64> + ret <1 x i64> %vcvt.i +} + +define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { +; CHECK-LABEL: test_bitcastv1f64tov8i8: +; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %vcvt.i = sitofp <1 x i64> %a to <1 x double> + %1 = bitcast <1 x double> %vcvt.i to <8 x i8> + %sub.i = sub <8 x i8> zeroinitializer, %1 + ret <8 x i8> %sub.i +} + +define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { +; CHECK-LABEL: test_bitcastv1f64tov4i16: +; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %vcvt.i = sitofp <1 x i64> %a to <1 x double> + %1 = bitcast <1 x double> %vcvt.i to <4 x i16> + %sub.i = sub <4 x i16> zeroinitializer, %1 + ret <4 x i16> %sub.i +} + +define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { +; CHECK-LABEL: test_bitcastv1f64tov2i32: +; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %vcvt.i = sitofp <1 x i64> %a to <1 x double> + %1 = bitcast <1 x double> %vcvt.i to <2 x i32> + %sub.i = sub <2 x i32> zeroinitializer, %1 + ret <2 x i32> %sub.i +} + +define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { +; CHECK-LABEL: test_bitcastv1f64tov1i64: +; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} + %vcvt.i = sitofp <1 x i64> %a to <1 x double> + %1 = bitcast <1 x double> %vcvt.i to <1 x i64> + %sub.i = sub <1 x i64> zeroinitializer, %1 + ret <1 x i64> %sub.i +} + +define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { +; CHECK-LABEL: test_bitcastv1f64tov2f32: +; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %vcvt.i = sitofp <1 x i64> %a to <1 x double> + %1 = bitcast <1 x double> %vcvt.i to <2 x float> + %sub.i = fsub <2 x float> , %1 + ret <2 x float> %sub.i +} + +; Test insert element into an undef vector +define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { +; CHECK-LABEL: scalar_to_vector.v8i8: +; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} + %b = insertelement <8 x i8> undef, i8 %a, i32 0 + ret <8 x i8> %b +} + +define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { +; CHECK-LABEL: scalar_to_vector.v16i8: +; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} + %b = insertelement <16 x i8> undef, i8 %a, i32 0 + ret <16 x i8> %b +} + +define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { +; CHECK-LABEL: scalar_to_vector.v4i16: +; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} + %b = insertelement <4 x i16> undef, i16 %a, i32 0 + ret <4 x i16> %b +} + +define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { +; CHECK-LABEL: scalar_to_vector.v8i16: +; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} + %b = insertelement <8 x i16> undef, i16 %a, i32 0 + ret <8 x i16> %b +} + +define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { +; CHECK-LABEL: scalar_to_vector.v2i32: +; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} + %b = insertelement <2 x i32> undef, i32 %a, i32 0 + ret <2 x i32> %b +} + +define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { +; CHECK-LABEL: scalar_to_vector.v4i32: +; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} + %b = insertelement <4 x i32> undef, i32 %a, i32 0 + ret <4 x i32> %b +} + +define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { +; CHECK-LABEL: scalar_to_vector.v2i64: +; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} + %b = insertelement <2 x i64> undef, i64 %a, i32 0 + ret <2 x i64> %b +} + +define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { +; CHECK-LABEL: testDUP.v1i8: +; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} + %b = extractelement <1 x i8> %a, i32 0 + %c = insertelement <8 x i8> undef, i8 %b, i32 0 + %d = insertelement <8 x i8> %c, i8 %b, i32 1 + %e = insertelement <8 x i8> %d, i8 %b, i32 2 + %f = insertelement <8 x i8> %e, i8 %b, i32 3 + %g = insertelement <8 x i8> %f, i8 %b, i32 4 + %h = insertelement <8 x i8> %g, i8 %b, i32 5 + %i = insertelement <8 x i8> %h, i8 %b, i32 6 + %j = insertelement <8 x i8> %i, i8 %b, i32 7 + ret <8 x i8> %j +} + +define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { +; CHECK-LABEL: testDUP.v1i16: +; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} + %b = extractelement <1 x i16> %a, i32 0 + %c = insertelement <8 x i16> undef, i16 %b, i32 0 + %d = insertelement <8 x i16> %c, i16 %b, i32 1 + %e = insertelement <8 x i16> %d, i16 %b, i32 2 + %f = insertelement <8 x i16> %e, i16 %b, i32 3 + %g = insertelement <8 x i16> %f, i16 %b, i32 4 + %h = insertelement <8 x i16> %g, i16 %b, i32 5 + %i = insertelement <8 x i16> %h, i16 %b, i32 6 + %j = insertelement <8 x i16> %i, i16 %b, i32 7 + ret <8 x i16> %j +} + +define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { +; CHECK-LABEL: testDUP.v1i32: +; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} + %b = extractelement <1 x i32> %a, i32 0 + %c = insertelement <4 x i32> undef, i32 %b, i32 0 + %d = insertelement <4 x i32> %c, i32 %b, i32 1 + %e = insertelement <4 x i32> %d, i32 %b, i32 2 + %f = insertelement <4 x i32> %e, i32 %b, i32 3 + ret <4 x i32> %f +} + +define <8 x i8> @getl(<16 x i8> %x) #0 { +; CHECK-LABEL: getl: +; CHECK: ret + %vecext = extractelement <16 x i8> %x, i32 0 + %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <16 x i8> %x, i32 1 + %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <16 x i8> %x, i32 2 + %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <16 x i8> %x, i32 3 + %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <16 x i8> %x, i32 4 + %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <16 x i8> %x, i32 5 + %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <16 x i8> %x, i32 6 + %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <16 x i8> %x, i32 7 + %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 + ret <8 x i8> %vecinit14 +} + +define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { +; CHECK-LABEL: test_dup_v2i32_v4i16: +; CHECK: dup v0.4h, v0.h[2] +entry: + %x = extractelement <2 x i32> %a, i32 1 + %vget_lane = trunc i32 %x to i16 + %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 + %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 + %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 + %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 + ret <4 x i16> %vecinit3.i +} + +define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { +; CHECK-LABEL: test_dup_v4i32_v8i16: +; CHECK: dup v0.8h, v0.h[6] +entry: + %x = extractelement <4 x i32> %a, i32 3 + %vget_lane = trunc i32 %x to i16 + %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 + %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 + %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 + %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 + %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 + %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 + %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 + %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 + ret <8 x i16> %vecinit7.i +} + +define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { +; CHECK-LABEL: test_dup_v1i64_v4i16: +; CHECK: dup v0.4h, v0.h[0] +entry: + %x = extractelement <1 x i64> %a, i32 0 + %vget_lane = trunc i64 %x to i16 + %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 + %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 + %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 + %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 + ret <4 x i16> %vecinit3.i +} + +define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { +; CHECK-LABEL: test_dup_v1i64_v2i32: +; CHECK: dup v0.2s, v0.s[0] +entry: + %x = extractelement <1 x i64> %a, i32 0 + %vget_lane = trunc i64 %x to i32 + %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 + %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 + ret <2 x i32> %vecinit1.i +} + +define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { +; CHECK-LABEL: test_dup_v2i64_v8i16: +; CHECK: dup v0.8h, v0.h[4] +entry: + %x = extractelement <2 x i64> %a, i32 1 + %vget_lane = trunc i64 %x to i16 + %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 + %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 + %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 + %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 + %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 + %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 + %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 + %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 + ret <8 x i16> %vecinit7.i +} + +define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { +; CHECK-LABEL: test_dup_v2i64_v4i32: +; CHECK: dup v0.4s, v0.s[2] +entry: + %x = extractelement <2 x i64> %a, i32 1 + %vget_lane = trunc i64 %x to i32 + %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 + %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 + %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 + %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 + ret <4 x i32> %vecinit3.i +} + +define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { +; CHECK-LABEL: test_dup_v4i32_v4i16: +; CHECK: dup v0.4h, v0.h[2] +entry: + %x = extractelement <4 x i32> %a, i32 1 + %vget_lane = trunc i32 %x to i16 + %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 + %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 + %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 + %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 + ret <4 x i16> %vecinit3.i +} + +define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { +; CHECK-LABEL: test_dup_v2i64_v4i16: +; CHECK: dup v0.4h, v0.h[0] +entry: + %x = extractelement <2 x i64> %a, i32 0 + %vget_lane = trunc i64 %x to i16 + %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 + %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 + %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 + %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 + ret <4 x i16> %vecinit3.i +} + +define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { +; CHECK-LABEL: test_dup_v2i64_v2i32: +; CHECK: dup v0.2s, v0.s[0] +entry: + %x = extractelement <2 x i64> %a, i32 0 + %vget_lane = trunc i64 %x to i32 + %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 + %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 + ret <2 x i32> %vecinit1.i +} + + +define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: +; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s +; CHECK-NEXT: ret +entry: + %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) + %1 = insertelement <1 x float> undef, float %0, i32 0 + %2 = extractelement <1 x float> %1, i32 0 + %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 + ret <2 x float> %vecinit1.i +} + +define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { +; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: +; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s +; CHECK-NEXT: ret +entry: + %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) + %1 = insertelement <1 x float> undef, float %0, i32 0 + %2 = extractelement <1 x float> %1, i32 0 + %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 + ret <4 x float> %vecinit1.i +} + +declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) + +define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { +; CHECK-LABEL: test_concat_undef_v1i32: +; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0] +entry: + %0 = extractelement <1 x i32> %a, i32 0 + %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 + ret <2 x i32> %vecinit1.i +} + +declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) #4 + +define <2 x i32> @test_concat_v1i32_undef(<1 x i32> %a) { +; CHECK-LABEL: test_concat_v1i32_undef: +; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} +; CHECK-NEXT: ret +entry: + %b = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) + %0 = extractelement <1 x i32> %b, i32 0 + %vecinit.i432 = insertelement <2 x i32> undef, i32 %0, i32 0 + ret <2 x i32> %vecinit.i432 +} + +define <2 x i32> @test_concat_same_v1i32_v1i32(<1 x i32> %a) { +; CHECK-LABEL: test_concat_same_v1i32_v1i32: +; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +entry: + %0 = extractelement <1 x i32> %a, i32 0 + %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 + %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 + ret <2 x i32> %vecinit1.i +} + +define <2 x i32> @test_concat_diff_v1i32_v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: test_concat_diff_v1i32_v1i32: +; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} +; CHECK-NEXT: sqabs s{{[0-9]+}}, s{{[0-9]+}} +; CHECK-NEXT: ins v0.s[1], v1.s[0] +entry: + %c = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) + %d = extractelement <1 x i32> %c, i32 0 + %e = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %b) + %f = extractelement <1 x i32> %e, i32 0 + %h = shufflevector <1 x i32> %c, <1 x i32> %e, <2 x i32> + ret <2 x i32> %h +} + +define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> + ret <16 x i8> %vecinit30 +} + +define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <8 x i8> %x, i32 0 + %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <8 x i8> %x, i32 1 + %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <8 x i8> %x, i32 2 + %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <8 x i8> %x, i32 3 + %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <8 x i8> %x, i32 4 + %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <8 x i8> %x, i32 5 + %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <8 x i8> %x, i32 6 + %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <8 x i8> %x, i32 7 + %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 + %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> + ret <16 x i8> %vecinit30 +} + +define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <16 x i8> %x, i32 0 + %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <16 x i8> %x, i32 1 + %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <16 x i8> %x, i32 2 + %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <16 x i8> %x, i32 3 + %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <16 x i8> %x, i32 4 + %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <16 x i8> %x, i32 5 + %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <16 x i8> %x, i32 6 + %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <16 x i8> %x, i32 7 + %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 + %vecext15 = extractelement <8 x i8> %y, i32 0 + %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 + %vecext17 = extractelement <8 x i8> %y, i32 1 + %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 + %vecext19 = extractelement <8 x i8> %y, i32 2 + %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 + %vecext21 = extractelement <8 x i8> %y, i32 3 + %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 + %vecext23 = extractelement <8 x i8> %y, i32 4 + %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 + %vecext25 = extractelement <8 x i8> %y, i32 5 + %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 + %vecext27 = extractelement <8 x i8> %y, i32 6 + %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 + %vecext29 = extractelement <8 x i8> %y, i32 7 + %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 + ret <16 x i8> %vecinit30 +} + +define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <8 x i8> %x, i32 0 + %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <8 x i8> %x, i32 1 + %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <8 x i8> %x, i32 2 + %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <8 x i8> %x, i32 3 + %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <8 x i8> %x, i32 4 + %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <8 x i8> %x, i32 5 + %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <8 x i8> %x, i32 6 + %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <8 x i8> %x, i32 7 + %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 + %vecext15 = extractelement <8 x i8> %y, i32 0 + %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 + %vecext17 = extractelement <8 x i8> %y, i32 1 + %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 + %vecext19 = extractelement <8 x i8> %y, i32 2 + %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 + %vecext21 = extractelement <8 x i8> %y, i32 3 + %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 + %vecext23 = extractelement <8 x i8> %y, i32 4 + %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 + %vecext25 = extractelement <8 x i8> %y, i32 5 + %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 + %vecext27 = extractelement <8 x i8> %y, i32 6 + %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 + %vecext29 = extractelement <8 x i8> %y, i32 7 + %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 + ret <16 x i8> %vecinit30 +} + +define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> + ret <8 x i16> %vecinit14 +} + +define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <4 x i16> %x, i32 0 + %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 + %vecext1 = extractelement <4 x i16> %x, i32 1 + %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 + %vecext3 = extractelement <4 x i16> %x, i32 2 + %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 + %vecext5 = extractelement <4 x i16> %x, i32 3 + %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 + %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> + ret <8 x i16> %vecinit14 +} + +define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <8 x i16> %x, i32 0 + %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 + %vecext1 = extractelement <8 x i16> %x, i32 1 + %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 + %vecext3 = extractelement <8 x i16> %x, i32 2 + %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 + %vecext5 = extractelement <8 x i16> %x, i32 3 + %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 + %vecext7 = extractelement <4 x i16> %y, i32 0 + %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 + %vecext9 = extractelement <4 x i16> %y, i32 1 + %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 + %vecext11 = extractelement <4 x i16> %y, i32 2 + %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 + %vecext13 = extractelement <4 x i16> %y, i32 3 + %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 + ret <8 x i16> %vecinit14 +} + +define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <4 x i16> %x, i32 0 + %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 + %vecext1 = extractelement <4 x i16> %x, i32 1 + %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 + %vecext3 = extractelement <4 x i16> %x, i32 2 + %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 + %vecext5 = extractelement <4 x i16> %x, i32 3 + %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 + %vecext7 = extractelement <4 x i16> %y, i32 0 + %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 + %vecext9 = extractelement <4 x i16> %y, i32 1 + %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 + %vecext11 = extractelement <4 x i16> %y, i32 2 + %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 + %vecext13 = extractelement <4 x i16> %y, i32 3 + %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 + ret <8 x i16> %vecinit14 +} + +define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + ret <4 x i32> %vecinit6 +} + +define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <2 x i32> %x, i32 0 + %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 + %vecext1 = extractelement <2 x i32> %x, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 + %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> + ret <4 x i32> %vecinit6 +} + +define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <4 x i32> %x, i32 0 + %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 + %vecext1 = extractelement <4 x i32> %x, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 + %vecext3 = extractelement <2 x i32> %y, i32 0 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 + %vecext5 = extractelement <2 x i32> %y, i32 1 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 + ret <4 x i32> %vecinit6 +} + +define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <2 x i32> %x, i32 0 + %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 + %vecext1 = extractelement <2 x i32> %x, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 + %vecext3 = extractelement <2 x i32> %y, i32 0 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 + %vecext5 = extractelement <2 x i32> %y, i32 1 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 + ret <4 x i32> %vecinit6 +} + +define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> + ret <2 x i64> %vecinit2 +} + +define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <1 x i64> %x, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> + ret <2 x i64> %vecinit2 +} + +define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <2 x i64> %x, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecext1 = extractelement <1 x i64> %y, i32 0 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 + ret <2 x i64> %vecinit2 +} + +define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <1 x i64> %x, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecext1 = extractelement <1 x i64> %y, i32 0 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 + ret <2 x i64> %vecinit2 +} + +declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) + +; This case tests the copy of two FPR8 registers, which is implemented by fmov +; of two FPR32 registers. +define <1 x i8> @test_copy_FPR8_FPR8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: test_copy_FPR8_FPR8: +; CHECK: usqadd b1, b0 +; CHECK-NEXT: fmov s0, s1 +entry: + %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %b, <1 x i8> %a) + ret <1 x i8> %vsqadd2.i +} + +declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: test_copy_FPR16_FPR16: +; CHECK: usqadd h1, h0 +; CHECK-NEXT: fmov s0, s1 +entry: + %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a) + ret <1 x i16> %vsqadd2.i +} + +define <4 x i16> @concat_vector_v4i16_const() { +; CHECK-LABEL: concat_vector_v4i16_const: +; CHECK: dup {{v[0-9]+}}.4h, wzr + %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %r +} + +define <4 x i16> @concat_vector_v4i16_const_one() { +; CHECK-LABEL: concat_vector_v4i16_const_one: +; CHECK: movz {{w[0-9]+}}, #1 +; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} + %r = shufflevector <1 x i16> , <1 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %r +} + +define <4 x i32> @concat_vector_v4i32_const() { +; CHECK-LABEL: concat_vector_v4i32_const: +; CHECK: dup {{v[0-9]+}}.4s, wzr + %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <8 x i8> @concat_vector_v8i8_const() { +; CHECK-LABEL: concat_vector_v8i8_const: +; CHECK: dup {{v[0-9]+}}.8b, wzr + %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %r +} + +define <8 x i16> @concat_vector_v8i16_const() { +; CHECK-LABEL: concat_vector_v8i16_const: +; CHECK: dup {{v[0-9]+}}.8h, wzr + %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r +} + +define <8 x i16> @concat_vector_v8i16_const_one() { +; CHECK-LABEL: concat_vector_v8i16_const_one: +; CHECK: movz {{w[0-9]+}}, #1 +; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} + %r = shufflevector <1 x i16> , <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r +} + +define <16 x i8> @concat_vector_v16i8_const() { +; CHECK-LABEL: concat_vector_v16i8_const: +; CHECK: dup {{v[0-9]+}}.16b, wzr + %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %r +} + +define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { +; CHECK-LABEL: concat_vector_v4i16: +; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] + %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %r +} + +define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { +; CHECK-LABEL: concat_vector_v4i32: +; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { +; CHECK-LABEL: concat_vector_v8i8: +; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0] + %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %r +} + +define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { +; CHECK-LABEL: concat_vector_v8i16: +; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] + %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r +} + +define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { +; CHECK-LABEL: concat_vector_v16i8: +; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0] + %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %r +} diff --git a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll new file mode 100644 index 0000000..4dffcd1 --- /dev/null +++ b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) { +; CHECK-LABEL: copyTuple.QPair: +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x{{[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %a, <4 x i32> , <4 x i32> , i32 0, i32 4) + %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 + %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> , i32 1, i32 4) + %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0 + ret <4 x i32> %vld1.fca.0.extract +} + +define <4 x i32> @copyTuple.QTriple(i8* %a, i8* %b, <4 x i32> %c) { +; CHECK-LABEL: copyTuple.QTriple: +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x{{[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %a, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i32 0, i32 4) + %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 + %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> , <4 x i32> %c, i32 1, i32 4) + %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 + ret <4 x i32> %vld1.fca.0.extract +} + +define <4 x i32> @copyTuple.QQuad(i8* %a, i8* %b, <4 x i32> %c) { +; CHECK-LABEL: copyTuple.QQuad: +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x{{[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %a, <4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4) + %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 + %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i32 1, i32 4) + %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 + ret <4 x i32> %vld1.fca.0.extract +} + +declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll index 0283e0e..c0014fa 100644 --- a/test/CodeGen/AArch64/neon-crypto.ll +++ b/test/CodeGen/AArch64/neon-crypto.ll @@ -1,40 +1,40 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s ; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s -declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32>, <1 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1 -declare <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32>, <1 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1 -declare <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32>, <1 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1 -declare <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32>) #1 +declare i32 @llvm.arm.neon.sha1h(i32) #1 -declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1 -declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1 -declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1 -declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1 define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) { ; CHECK: test_vaeseq_u8: ; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese entry: - %aese.i = tail call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %data, <16 x i8> %key) + %aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) ret <16 x i8> %aese.i } @@ -42,7 +42,7 @@ define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) { ; CHECK: test_vaesdq_u8: ; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: - %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %data, <16 x i8> %key) + %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) ret <16 x i8> %aesd.i } @@ -50,7 +50,7 @@ define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) { ; CHECK: test_vaesmcq_u8: ; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: - %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %data) + %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data) ret <16 x i8> %aesmc.i } @@ -58,7 +58,7 @@ define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) { ; CHECK: test_vaesimcq_u8: ; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: - %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %data) + %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data) ret <16 x i8> %aesimc.i } @@ -66,17 +66,15 @@ define i32 @test_vsha1h_u32(i32 %hash_e) { ; CHECK: test_vsha1h_u32: ; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}} entry: - %sha1h.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1h1.i = tail call <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32> %sha1h.i) - %0 = extractelement <1 x i32> %sha1h1.i, i32 0 - ret i32 %0 + %sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e) + ret i32 %sha1h1.i } define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) { ; CHECK: test_vsha1su1q_u32: ; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w12_15) + %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15) ret <4 x i32> %sha1su12.i } @@ -84,7 +82,7 @@ define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) { ; CHECK: test_vsha256su0q_u32: ; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7) + %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) ret <4 x i32> %sha256su02.i } @@ -92,8 +90,7 @@ define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> ; CHECK: test_vsha1cq_u32: ; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha1c.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1c1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32> %hash_abcd, <1 x i32> %sha1c.i, <4 x i32> %wk) + %sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %sha1c1.i } @@ -101,8 +98,7 @@ define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> ; CHECK: test_vsha1pq_u32: ; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha1p.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1p1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32> %hash_abcd, <1 x i32> %sha1p.i, <4 x i32> %wk) + %sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %sha1p1.i } @@ -110,8 +106,7 @@ define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> ; CHECK: test_vsha1mq_u32: ; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha1m.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1m1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32> %hash_abcd, <1 x i32> %sha1m.i, <4 x i32> %wk) + %sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %sha1m1.i } @@ -119,7 +114,7 @@ define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> ; CHECK: test_vsha1su0q_u32: ; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) + %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) ret <4 x i32> %sha1su03.i } @@ -127,7 +122,7 @@ define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, ; CHECK: test_vsha256hq_u32: ; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) + %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) ret <4 x i32> %sha256h3.i } @@ -135,7 +130,7 @@ define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd ; CHECK: test_vsha256h2q_u32: ; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) + %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) ret <4 x i32> %sha256h23.i } @@ -143,7 +138,7 @@ define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x ; CHECK: test_vsha256su1q_u32: ; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) ret <4 x i32> %sha256su13.i } diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll index 5c52cd3..cddc226 100644 --- a/test/CodeGen/AArch64/neon-extract.ll +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -188,3 +188,35 @@ entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext } + +define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) { +; CHECK: test_undef_vext_s8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +entry: + %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %vext +} + +define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) { +; CHECK: test_undef_vextq_s8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %vext +} + +define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) { +; CHECK: test_undef_vext_s16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +entry: + %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %vext +} + +define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) { +; CHECK: test_undef_vextq_s16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %vext +} diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll index 146256e..28e8212 100644 --- a/test/CodeGen/AArch64/neon-facge-facgt.ll +++ b/test/CodeGen/AArch64/neon-facge-facgt.ll @@ -1,20 +1,20 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>) +declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) +declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) +declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>) define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B) + %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B) ; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ret <2 x i32> %val } define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B) + %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B) ; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ret <4 x i32> %val } @@ -22,26 +22,26 @@ define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v2i64: - %val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B) + %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B) ; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d ret <2 x i64> %val } -declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>) +declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) +declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) +declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>) define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B) + %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B) ; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ret <2 x i32> %val } define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B) + %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B) ; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ret <4 x i32> %val } @@ -49,7 +49,7 @@ define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v2i64: - %val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B) + %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B) ; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d ret <2 x i64> %val } diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll index dcf4e28..af70302 100644 --- a/test/CodeGen/AArch64/neon-fma.ll +++ b/test/CodeGen/AArch64/neon-fma.ll @@ -1,21 +1,21 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = fmul <2 x float> %A, %B; %tmp2 = fadd <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = fmul <4 x float> %A, %B; %tmp2 = fadd <4 x float> %C, %tmp1; ret <4 x float> %tmp2 } define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp1 = fmul <2 x double> %A, %B; %tmp2 = fadd <2 x double> %C, %tmp1; ret <2 x double> %tmp2 @@ -23,21 +23,21 @@ define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = fmul <2 x float> %A, %B; %tmp2 = fsub <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = fmul <4 x float> %A, %B; %tmp2 = fsub <4 x float> %C, %tmp1; ret <4 x float> %tmp2 } define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp1 = fmul <2 x double> %A, %B; %tmp2 = fsub <2 x double> %C, %tmp1; ret <2 x double> %tmp2 @@ -51,39 +51,39 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) ret <2 x float> %val } define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) ret <4 x float> %val } define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) ret <2 x double> %val } define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %negA = fsub <2 x float> , %A %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C) ret <2 x float> %val } define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %negA = fsub <4 x float> , %A %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C) ret <4 x float> %val } define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %negA = fsub <2 x double> , %A %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C) ret <2 x double> %val @@ -94,19 +94,39 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) ret <2 x float> %val } define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) ret <4 x float> %val } define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) ret <2 x double> %val } + + +; Another set of tests that check for multiply single use + +define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = fmul <2 x float> %A, %B; + %tmp2 = fadd <2 x float> %C, %tmp1; + %tmp3 = fadd <2 x float> %tmp2, %tmp1; + ret <2 x float> %tmp3 +} + +define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) { +;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + %tmp1 = fmul <2 x double> %A, %B; + %tmp2 = fsub <2 x double> %C, %tmp1; + %tmp3 = fsub <2 x double> %tmp2, %tmp1; + ret <2 x double> %tmp3 +} + diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll new file mode 100644 index 0000000..a93f3f2 --- /dev/null +++ b/test/CodeGen/AArch64/neon-fpround_f128.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + +define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) { +; CHECK-LABEL: test_fpround_v1f128: +; CHECK: bl __trunctfdf2 + %b = load <1 x fp128>* %a + %c = fptrunc <1 x fp128> %b to <1 x double> + ret <1 x double> %c +} + +define <2 x double> @test_fpround_v2f128(<2 x fp128>* %a) { +; CHECK-LABEL: test_fpround_v2f128: +; CHECK: bl __trunctfdf2 +; CHECK: bl __trunctfdf2 + %b = load <2 x fp128>* %a + %c = fptrunc <2 x fp128> %b to <2 x double> + ret <2 x double> %c +} diff --git a/test/CodeGen/AArch64/neon-load-store-v1i32.ll b/test/CodeGen/AArch64/neon-load-store-v1i32.ll new file mode 100644 index 0000000..92f704d --- /dev/null +++ b/test/CodeGen/AArch64/neon-load-store-v1i32.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly +define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) { +; CHECK-LABEL: load.store.v1i8: +; CHECK: ldr b{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str b{{[0-9]+}}, [x{{[0-9]+|sp}}] + %a = load <1 x i8>* %ptr + store <1 x i8> %a, <1 x i8>* %ptr2 + ret void +} + +define void @load.store.v1i16(<1 x i16>* %ptr, <1 x i16>* %ptr2) { +; CHECK-LABEL: load.store.v1i16: +; CHECK: ldr h{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str h{{[0-9]+}}, [x{{[0-9]+|sp}}] + %a = load <1 x i16>* %ptr + store <1 x i16> %a, <1 x i16>* %ptr2 + ret void +} + +define void @load.store.v1i32(<1 x i32>* %ptr, <1 x i32>* %ptr2) { +; CHECK-LABEL: load.store.v1i32: +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+|sp}}] + %a = load <1 x i32>* %ptr + store <1 x i32> %a, <1 x i32>* %ptr2 + ret void +} diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll index d757aca..3e18077 100644 --- a/test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ b/test/CodeGen/AArch64/neon-max-min-pairwise.ll @@ -308,3 +308,39 @@ define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ret <2 x double> %val } +define i32 @test_vminv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vminv_s32 +; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vminv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vminv_u32 +; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vmaxv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vmaxv_s32 +; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vmaxv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vmaxv_u32 +; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll index 9660bf2..7ec36c2 100644 --- a/test/CodeGen/AArch64/neon-misc.ll +++ b/test/CodeGen/AArch64/neon-misc.ll @@ -894,13 +894,13 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { ; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4 + %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4 ret <2 x float> %vcvtx_f32_f641.i } define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4 + %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> ret <4 x float> %shuffle.i } @@ -1080,147 +1080,255 @@ define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 { ret <2 x i64> %vcvt.i } -define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 { +define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 { +; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s +; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %vcvt.i = fptosi <2 x float> %a to <2 x i64> + ret <2 x i64> %vcvt.i +} + +define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 { +; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s +; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %vcvt.i = fptoui <2 x float> %a to <2 x i64> + ret <2 x i64> %vcvt.i +} + +define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 { +; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s + %vcvt.i = fptosi <4 x float> %a to <4 x i16> + ret <4 x i16> %vcvt.i +} + +define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 { +; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s + %vcvt.i = fptoui <4 x float> %a to <4 x i16> + ret <4 x i16> %vcvt.i +} + +define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 { +; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d + %vcvt.i = fptosi <2 x double> %a to <2 x i32> + ret <2 x i32> %vcvt.i +} + +define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 { +; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d + %vcvt.i = fptoui <2 x double> %a to <2 x i32> + ret <2 x i32> %vcvt.i +} + +define <1 x i8> @test_vcvt_s8_f64(<1 x double> %a) #0 { +; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}} + %vcvt.i = fptosi <1 x double> %a to <1 x i8> + ret <1 x i8> %vcvt.i +} + +define <1 x i8> @test_vcvt_u8_f64(<1 x double> %a) #0 { +; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}} + %vcvt.i = fptoui <1 x double> %a to <1 x i8> + ret <1 x i8> %vcvt.i +} + +define <1 x i16> @test_vcvt_s16_f64(<1 x double> %a) #0 { +; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}} + %vcvt.i = fptosi <1 x double> %a to <1 x i16> + ret <1 x i16> %vcvt.i +} + +define <1 x i16> @test_vcvt_u16_f64(<1 x double> %a) #0 { +; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}} + %vcvt.i = fptoui <1 x double> %a to <1 x i16> + ret <1 x i16> %vcvt.i +} + +define <1 x i32> @test_vcvt_s32_f64_v1(<1 x double> %a) #0 { +; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} +; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = fptosi <1 x double> %a to <1 x i32> + ret <1 x i32> %vcvt.i +} + +define <1 x i32> @test_vcvt_u32_f64_v1(<1 x double> %a) #0 { +; CHECK: fcvtzu w{{[0-9]+}}, d{{[0-9]+}} +; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = fptoui <1 x double> %a to <1 x i32> + ret <1 x i32> %vcvt.i +} + +define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtn_s32_f32 ; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtns_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #4 + %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtns_f321.i } -define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtnq_s32_f32 ; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtns_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #4 + %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtns_f321.i } -define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtnq_s64_f64 ; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtns_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #4 + %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtns_f641.i } -define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtn_u32_f32 ; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtnu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #4 + %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtnu_f321.i } -define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtnq_u32_f32 ; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtnu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #4 + %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtnu_f321.i } -define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtnq_u64_f64 ; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtnu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #4 + %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtnu_f641.i } -define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtp_s32_f32 ; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtps_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #4 + %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtps_f321.i } -define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtpq_s32_f32 ; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtps_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #4 + %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtps_f321.i } -define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtpq_s64_f64 ; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtps_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #4 + %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtps_f641.i } -define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtp_u32_f32 ; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtpu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #4 + %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtpu_f321.i } -define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtpq_u32_f32 ; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtpu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #4 + %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtpu_f321.i } -define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtpq_u64_f64 ; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtpu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #4 + %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtpu_f641.i } -define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtm_s32_f32 ; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtms_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #4 + %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtms_f321.i } -define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtmq_s32_f32 ; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtms_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #4 + %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtms_f321.i } -define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtmq_s64_f64 ; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtms_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #4 + %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtms_f641.i } -define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtm_u32_f32 ; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtmu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #4 + %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtmu_f321.i } -define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtmq_u32_f32 ; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtmu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #4 + %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtmu_f321.i } -define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtmq_u64_f64 ; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtmu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #4 + %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtmu_f641.i } -define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvta_s32_f32 ; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtas_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #4 + %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtas_f321.i } -define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtaq_s32_f32 ; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtas_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #4 + %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtas_f321.i } -define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtaq_s64_f64 ; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtas_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #4 + %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtas_f641.i } -define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvta_u32_f32 ; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtau_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #4 + %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtau_f321.i } -define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtaq_u32_f32 ; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtau_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #4 + %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtau_f321.i } -define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtaq_u64_f64 ; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtau_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #4 + %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtau_f641.i } @@ -1326,6 +1434,94 @@ define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { ret <2 x double> %vcvt.i } +define <2 x float> @test_vcvt_f32_s64(<2 x i64> %a) #0 { +; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d + %vcvt.i = sitofp <2 x i64> %a to <2 x float> + ret <2 x float> %vcvt.i +} + +define <2 x float> @test_vcvt_f32_u64(<2 x i64> %a) #0 { +; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d + %vcvt.i = uitofp <2 x i64> %a to <2 x float> + ret <2 x float> %vcvt.i +} + +define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 { +; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 +; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %vcvt.i = sitofp <4 x i16> %a to <4 x float> + ret <4 x float> %vcvt.i +} + +define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 { +; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 +; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %vcvt.i = uitofp <4 x i16> %a to <4 x float> + ret <4 x float> %vcvt.i +} + +define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 { +; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 +; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %vcvt.i = sitofp <2 x i32> %a to <2 x double> + ret <2 x double> %vcvt.i +} + +define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 { +; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 +; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %vcvt.i = uitofp <2 x i32> %a to <2 x double> + ret <2 x double> %vcvt.i +} + +define <1 x double> @test_vcvt_f64_s8(<1 x i8> %a) #0 { +; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0] +; CHECK: sxtb w{{[0-9]+}}, w{{[0-9]+}} +; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = sitofp <1 x i8> %a to <1 x double> + ret <1 x double> %vcvt.i +} + +define <1 x double> @test_vcvt_f64_u8(<1 x i8> %a) #0 { +; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0] +; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xff +; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = uitofp <1 x i8> %a to <1 x double> + ret <1 x double> %vcvt.i +} + +define <1 x double> @test_vcvt_f64_s16(<1 x i16> %a) #0 { +; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0] +; CHECK: sxth w{{[0-9]+}}, w{{[0-9]+}} +; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = sitofp <1 x i16> %a to <1 x double> + ret <1 x double> %vcvt.i +} + +define <1 x double> @test_vcvt_f64_u16(<1 x i16> %a) #0 { +; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0] +; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xffff +; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = uitofp <1 x i16> %a to <1 x double> + ret <1 x double> %vcvt.i +} + +define <1 x double> @test_vcvt_f64_s32_v1(<1 x i32> %a) #0 { +; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}} +; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = sitofp <1 x i32> %a to <1 x double> + ret <1 x double> %vcvt.i +} + +define <1 x double> @test_vcvt_f64_u32_v1(<1 x i32> %a) #0 { +; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}} +; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} + %vcvt.i = uitofp <1 x i32> %a to <1 x double> + ret <1 x double> %vcvt.i +} + declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2 @@ -1348,53 +1544,53 @@ declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2 -declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>) declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3 @@ -1438,7 +1634,7 @@ declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2 declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2 -declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2 +declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2 declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2 @@ -1624,56 +1820,56 @@ define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) { define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvtn_s64_f64 ; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvtn_u64_f64 ; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvtp_s64_f64 ; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvtp_u64_f64 ; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvtm_s64_f64 ; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvtm_u64_f64 ; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvta_s64_f64 ; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvta_u64_f64 ; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) + %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a) ret <1 x i64> %1 } @@ -1691,14 +1887,14 @@ define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) { ret <1 x double> %1 } -declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>) define <1 x double> @test_vrndn_f64(<1 x double> %a) { ; CHECK-LABEL: test_vrndn_f64 @@ -1796,4 +1992,23 @@ declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) -declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) \ No newline at end of file +declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) + +define i64 @test_vaddlv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddlv_s32 +; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s + %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +define i64 @test_vaddlv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddlv_u32 +; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s + %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>) +declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll index 23e9223..71bb0e7 100644 --- a/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/test/CodeGen/AArch64/neon-mla-mls.ll @@ -2,84 +2,84 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { -;CHECK: mla {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = mul <8 x i8> %A, %B; %tmp2 = add <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 } define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -;CHECK: mla {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = mul <16 x i8> %A, %B; %tmp2 = add <16 x i8> %C, %tmp1; ret <16 x i8> %tmp2 } define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { -;CHECK: mla {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp1 = mul <4 x i16> %A, %B; %tmp2 = add <4 x i16> %C, %tmp1; ret <4 x i16> %tmp2 } define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { -;CHECK: mla {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp1 = mul <8 x i16> %A, %B; %tmp2 = add <8 x i16> %C, %tmp1; ret <8 x i16> %tmp2 } define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { -;CHECK: mla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = mul <2 x i32> %A, %B; %tmp2 = add <2 x i32> %C, %tmp1; ret <2 x i32> %tmp2 } define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { -;CHECK: mla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = mul <4 x i32> %A, %B; %tmp2 = add <4 x i32> %C, %tmp1; ret <4 x i32> %tmp2 } define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { -;CHECK: mls {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 } define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -;CHECK: mls {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = mul <16 x i8> %A, %B; %tmp2 = sub <16 x i8> %C, %tmp1; ret <16 x i8> %tmp2 } define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { -;CHECK: mls {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %C, %tmp1; ret <4 x i16> %tmp2 } define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { -;CHECK: mls {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp1 = mul <8 x i16> %A, %B; %tmp2 = sub <8 x i16> %C, %tmp1; ret <8 x i16> %tmp2 } define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { -;CHECK: mls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %C, %tmp1; ret <2 x i32> %tmp2 } define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { -;CHECK: mls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = mul <4 x i32> %A, %B; %tmp2 = sub <4 x i32> %C, %tmp1; ret <4 x i32> %tmp2 diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll index 60b13b8..4035b91 100644 --- a/test/CodeGen/AArch64/neon-mov.ll +++ b/test/CodeGen/AArch64/neon-mov.ll @@ -1,204 +1,204 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @movi8b() { -;CHECK: movi {{v[0-31]+}}.8b, #0x8 +;CHECK: movi {{v[0-9]+}}.8b, #0x8 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <16 x i8> @movi16b() { -;CHECK: movi {{v[0-31]+}}.16b, #0x8 +;CHECK: movi {{v[0-9]+}}.16b, #0x8 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <2 x i32> @movi2s_lsl0() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff +;CHECK: movi {{v[0-9]+}}.2s, #0xff ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #8 ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #16 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #16 ret <2 x i32> < i32 16711680, i32 16711680 > } define <2 x i32> @movi2s_lsl24() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #24 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #24 ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff +;CHECK: movi {{v[0-9]+}}.4s, #0xff ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #8 ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #16 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #16 ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > } define <4 x i32> @movi4s_lsl24() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #24 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #24 ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { -;CHECK: movi {{v[0-31]+}}.4h, #0xff +;CHECK: movi {{v[0-9]+}}.4h, #0xff ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { -;CHECK: movi {{v[0-31]+}}.4h, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.4h, #0xff, lsl #8 ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { -;CHECK: movi {{v[0-31]+}}.8h, #0xff +;CHECK: movi {{v[0-9]+}}.8h, #0xff ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { -;CHECK: movi {{v[0-31]+}}.8h, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.8h, #0xff, lsl #8 ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } define <2 x i32> @mvni2s_lsl0() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10 ret <2 x i32> < i32 4294967279, i32 4294967279 > } define <2 x i32> @mvni2s_lsl8() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #8 ret <2 x i32> < i32 4294963199, i32 4294963199 > } define <2 x i32> @mvni2s_lsl16() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #16 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #16 ret <2 x i32> < i32 4293918719, i32 4293918719 > } define <2 x i32> @mvni2s_lsl24() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #24 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #24 ret <2 x i32> < i32 4026531839, i32 4026531839 > } define <4 x i32> @mvni4s_lsl0() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10 ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > } define <4 x i32> @mvni4s_lsl8() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #8 ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 > } define <4 x i32> @mvni4s_lsl16() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #16 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #16 ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 > } define <4 x i32> @mvni4s_lsl24() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #24 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #24 ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 > } define <4 x i16> @mvni4h_lsl0() { -;CHECK: mvni {{v[0-31]+}}.4h, #0x10 +;CHECK: mvni {{v[0-9]+}}.4h, #0x10 ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > } define <4 x i16> @mvni4h_lsl8() { -;CHECK: mvni {{v[0-31]+}}.4h, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.4h, #0x10, lsl #8 ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > } define <8 x i16> @mvni8h_lsl0() { -;CHECK: mvni {{v[0-31]+}}.8h, #0x10 +;CHECK: mvni {{v[0-9]+}}.8h, #0x10 ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 > } define <8 x i16> @mvni8h_lsl8() { -;CHECK: mvni {{v[0-31]+}}.8h, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.8h, #0x10, lsl #8 ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 > } define <2 x i32> @movi2s_msl8(<2 x i32> %a) { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #8 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, msl #8 ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #16 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, msl #16 ret <2 x i32> < i32 16777215, i32 16777215 > } define <4 x i32> @movi4s_msl8() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #8 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, msl #8 ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #16 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, msl #16 ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } define <2 x i32> @mvni2s_msl8() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #8 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, msl #8 ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264> } define <2 x i32> @mvni2s_msl16() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #16 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, msl #16 ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504> } define <4 x i32> @mvni4s_msl8() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #8 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, msl #8 ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264> } define <4 x i32> @mvni4s_msl16() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #16 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, msl #16 ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504> } define <2 x i64> @movi2d() { -;CHECK: movi {{v[0-31]+}}.2d, #0xff0000ff0000ffff +;CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } define <1 x i64> @movid() { -;CHECK: movi {{d[0-31]+}}, #0xff0000ff0000ffff +;CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff ret <1 x i64> < i64 18374687574888349695 > } define <2 x float> @fmov2s() { -;CHECK: fmov {{v[0-31]+}}.2s, #-12.00000000 +;CHECK: fmov {{v[0-9]+}}.2s, #-12.00000000 ret <2 x float> < float -1.2e1, float -1.2e1> } define <4 x float> @fmov4s() { -;CHECK: fmov {{v[0-31]+}}.4s, #-12.00000000 +;CHECK: fmov {{v[0-9]+}}.4s, #-12.00000000 ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1> } define <2 x double> @fmov2d() { -;CHECK: fmov {{v[0-31]+}}.2d, #-12.00000000 +;CHECK: fmov {{v[0-9]+}}.2d, #-12.00000000 ret <2 x double> < double -1.2e1, double -1.2e1> } @@ -210,7 +210,9 @@ define <2 x i32> @movi1d_1() { declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>) define <2 x i32> @movi1d() { -; CHECK: movi d1, #0xffffffff0000 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +; CHECK-NEXT: movi d1, #0xffffffff0000 %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) ret <2 x i32> %1 } diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll index e1be313..da22ce8 100644 --- a/test/CodeGen/AArch64/neon-mul-div.ll +++ b/test/CodeGen/AArch64/neon-mul-div.ll @@ -2,76 +2,628 @@ define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = mul <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = mul <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp3 = mul <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp3 = mul <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = mul <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = mul <4 x i32> %A, %B; ret <4 x i32> %tmp3 } +define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { +;CHECK-LABEL: mul1xi64: +;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + %tmp3 = mul <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { +;CHECK-LABEL: mul2xi64: +;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + %tmp3 = mul <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fmul <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fmul <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fmul <2 x double> %A, %B; ret <2 x double> %tmp3 } define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fdiv <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fdiv <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fdiv <2 x double> %A, %B; ret <2 x double> %tmp3 } +define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = sdiv <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) { +;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = sdiv <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = udiv <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) { +;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = udiv <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = srem <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) { +;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = srem <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = urem <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) { +;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = urem <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { +; CHECK: bl fmodf +; CHECK: bl fmodf + %tmp3 = frem <2 x float> %A, %B; + ret <2 x float> %tmp3 +} + +define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { +; CHECK: bl fmodf +; CHECK: bl fmodf +; CHECK: bl fmodf +; CHECK: bl fmodf + %tmp3 = frem <4 x float> %A, %B; + ret <4 x float> %tmp3 +} + +define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) { +; CHECK: bl fmod + %tmp3 = frem <1 x double> %A, %B; + ret <1 x double> %tmp3 +} + +define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { +; CHECK: bl fmod +; CHECK: bl fmod + %tmp3 = frem <2 x double> %A, %B; + ret <2 x double> %tmp3 +} + declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) @@ -179,3 +731,24 @@ define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) ret <2 x double> %val } + +define <1 x i8> @test_mul_v1i8(<1 x i8> %a, <1 x i8> %b) { +;CHECK-LABEL: test_mul_v1i8: +;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %c = mul <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @test_mul_v1i16(<1 x i16> %a, <1 x i16> %b) { +;CHECK-LABEL: test_mul_v1i16: +;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %c = mul <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @test_mul_v1i32(<1 x i32> %a, <1 x i32> %b) { +;CHECK-LABEL: test_mul_v1i32: +;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %c = mul <1 x i32> %a, %b + ret <1 x i32> %c +} diff --git a/test/CodeGen/AArch64/neon-or-combine.ll b/test/CodeGen/AArch64/neon-or-combine.ll new file mode 100644 index 0000000..260f693 --- /dev/null +++ b/test/CodeGen/AArch64/neon-or-combine.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; Check that the DAGCombiner does not crash with an assertion failure +; when performing a target specific combine to simplify a 'or' dag node +; according to the following rule: +; (or (and B, A), (and C, ~A)) => (VBSL A, B, C) +; The assertion failure was caused by an invalid comparison between APInt +; values with different 'BitWidth'. + +define <8 x i8> @test1(<8 x i8> %a, <8 x i8> %b) { + %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 > + %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > + %tmp3 = or <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +; CHECK-LABEL: test1 +; CHECK: ret + +define <16 x i8> @test2(<16 x i8> %a, <16 x i8> %b) { + %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + %tmp2 = and <16 x i8> %b, < i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > + %tmp3 = or <16 x i8> %tmp1, %tmp2 + ret <16 x i8> %tmp3 +} + +; CHECK-LABEL: test2 +; CHECK: ret + diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll index fa4d54d..a0b17e1 100644 --- a/test/CodeGen/AArch64/neon-perm.ll +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -1030,6 +1030,1447 @@ entry: ret <8 x i16> %shuffle.i } +define <8 x i8> @test_same_vuzp1_s8(<8 x i8> %a) { +; CHECK: test_same_vuzp1_s8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vuzp1q_s8(<16 x i8> %a) { +; CHECK: test_same_vuzp1q_s8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vuzp1_s16(<4 x i16> %a) { +; CHECK: test_same_vuzp1_s16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vuzp1q_s16(<8 x i16> %a) { +; CHECK: test_same_vuzp1q_s16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vuzp1q_s32(<4 x i32> %a) { +; CHECK: test_same_vuzp1q_s32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_same_vuzp1_u8(<8 x i8> %a) { +; CHECK: test_same_vuzp1_u8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vuzp1q_u8(<16 x i8> %a) { +; CHECK: test_same_vuzp1q_u8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vuzp1_u16(<4 x i16> %a) { +; CHECK: test_same_vuzp1_u16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vuzp1q_u16(<8 x i16> %a) { +; CHECK: test_same_vuzp1q_u16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vuzp1q_u32(<4 x i32> %a) { +; CHECK: test_same_vuzp1q_u32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_same_vuzp1q_f32(<4 x float> %a) { +; CHECK: test_same_vuzp1q_f32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_same_vuzp1_p8(<8 x i8> %a) { +; CHECK: test_same_vuzp1_p8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vuzp1q_p8(<16 x i8> %a) { +; CHECK: test_same_vuzp1q_p8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vuzp1_p16(<4 x i16> %a) { +; CHECK: test_same_vuzp1_p16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vuzp1q_p16(<8 x i16> %a) { +; CHECK: test_same_vuzp1q_p16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_same_vuzp2_s8(<8 x i8> %a) { +; CHECK: test_same_vuzp2_s8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vuzp2q_s8(<16 x i8> %a) { +; CHECK: test_same_vuzp2q_s8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vuzp2_s16(<4 x i16> %a) { +; CHECK: test_same_vuzp2_s16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vuzp2q_s16(<8 x i16> %a) { +; CHECK: test_same_vuzp2q_s16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vuzp2q_s32(<4 x i32> %a) { +; CHECK: test_same_vuzp2q_s32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_same_vuzp2_u8(<8 x i8> %a) { +; CHECK: test_same_vuzp2_u8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vuzp2q_u8(<16 x i8> %a) { +; CHECK: test_same_vuzp2q_u8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vuzp2_u16(<4 x i16> %a) { +; CHECK: test_same_vuzp2_u16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vuzp2q_u16(<8 x i16> %a) { +; CHECK: test_same_vuzp2q_u16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vuzp2q_u32(<4 x i32> %a) { +; CHECK: test_same_vuzp2q_u32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_same_vuzp2q_f32(<4 x float> %a) { +; CHECK: test_same_vuzp2q_f32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_same_vuzp2_p8(<8 x i8> %a) { +; CHECK: test_same_vuzp2_p8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vuzp2q_p8(<16 x i8> %a) { +; CHECK: test_same_vuzp2q_p8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vuzp2_p16(<4 x i16> %a) { +; CHECK: test_same_vuzp2_p16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vuzp2q_p16(<8 x i16> %a) { +; CHECK: test_same_vuzp2q_p16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_same_vzip1_s8(<8 x i8> %a) { +; CHECK: test_same_vzip1_s8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vzip1q_s8(<16 x i8> %a) { +; CHECK: test_same_vzip1q_s8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vzip1_s16(<4 x i16> %a) { +; CHECK: test_same_vzip1_s16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vzip1q_s16(<8 x i16> %a) { +; CHECK: test_same_vzip1q_s16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vzip1q_s32(<4 x i32> %a) { +; CHECK: test_same_vzip1q_s32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_same_vzip1_u8(<8 x i8> %a) { +; CHECK: test_same_vzip1_u8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vzip1q_u8(<16 x i8> %a) { +; CHECK: test_same_vzip1q_u8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vzip1_u16(<4 x i16> %a) { +; CHECK: test_same_vzip1_u16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vzip1q_u16(<8 x i16> %a) { +; CHECK: test_same_vzip1q_u16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vzip1q_u32(<4 x i32> %a) { +; CHECK: test_same_vzip1q_u32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_same_vzip1q_f32(<4 x float> %a) { +; CHECK: test_same_vzip1q_f32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_same_vzip1_p8(<8 x i8> %a) { +; CHECK: test_same_vzip1_p8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vzip1q_p8(<16 x i8> %a) { +; CHECK: test_same_vzip1q_p8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vzip1_p16(<4 x i16> %a) { +; CHECK: test_same_vzip1_p16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vzip1q_p16(<8 x i16> %a) { +; CHECK: test_same_vzip1q_p16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) { +; CHECK: test_same_vzip2_s8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vzip2q_s8(<16 x i8> %a) { +; CHECK: test_same_vzip2q_s8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vzip2_s16(<4 x i16> %a) { +; CHECK: test_same_vzip2_s16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vzip2q_s16(<8 x i16> %a) { +; CHECK: test_same_vzip2q_s16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vzip2q_s32(<4 x i32> %a) { +; CHECK: test_same_vzip2q_s32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_same_vzip2_u8(<8 x i8> %a) { +; CHECK: test_same_vzip2_u8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vzip2q_u8(<16 x i8> %a) { +; CHECK: test_same_vzip2q_u8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vzip2_u16(<4 x i16> %a) { +; CHECK: test_same_vzip2_u16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vzip2q_u16(<8 x i16> %a) { +; CHECK: test_same_vzip2q_u16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vzip2q_u32(<4 x i32> %a) { +; CHECK: test_same_vzip2q_u32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_same_vzip2q_f32(<4 x float> %a) { +; CHECK: test_same_vzip2q_f32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_same_vzip2_p8(<8 x i8> %a) { +; CHECK: test_same_vzip2_p8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vzip2q_p8(<16 x i8> %a) { +; CHECK: test_same_vzip2q_p8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vzip2_p16(<4 x i16> %a) { +; CHECK: test_same_vzip2_p16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vzip2q_p16(<8 x i16> %a) { +; CHECK: test_same_vzip2q_p16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_same_vtrn1_s8(<8 x i8> %a) { +; CHECK: test_same_vtrn1_s8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vtrn1q_s8(<16 x i8> %a) { +; CHECK: test_same_vtrn1q_s8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vtrn1_s16(<4 x i16> %a) { +; CHECK: test_same_vtrn1_s16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vtrn1q_s16(<8 x i16> %a) { +; CHECK: test_same_vtrn1q_s16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vtrn1q_s32(<4 x i32> %a) { +; CHECK: test_same_vtrn1q_s32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_same_vtrn1_u8(<8 x i8> %a) { +; CHECK: test_same_vtrn1_u8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vtrn1q_u8(<16 x i8> %a) { +; CHECK: test_same_vtrn1q_u8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vtrn1_u16(<4 x i16> %a) { +; CHECK: test_same_vtrn1_u16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vtrn1q_u16(<8 x i16> %a) { +; CHECK: test_same_vtrn1q_u16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vtrn1q_u32(<4 x i32> %a) { +; CHECK: test_same_vtrn1q_u32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_same_vtrn1q_f32(<4 x float> %a) { +; CHECK: test_same_vtrn1q_f32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_same_vtrn1_p8(<8 x i8> %a) { +; CHECK: test_same_vtrn1_p8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vtrn1q_p8(<16 x i8> %a) { +; CHECK: test_same_vtrn1q_p8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vtrn1_p16(<4 x i16> %a) { +; CHECK: test_same_vtrn1_p16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vtrn1q_p16(<8 x i16> %a) { +; CHECK: test_same_vtrn1q_p16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_same_vtrn2_s8(<8 x i8> %a) { +; CHECK: test_same_vtrn2_s8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vtrn2q_s8(<16 x i8> %a) { +; CHECK: test_same_vtrn2q_s8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vtrn2_s16(<4 x i16> %a) { +; CHECK: test_same_vtrn2_s16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vtrn2q_s16(<8 x i16> %a) { +; CHECK: test_same_vtrn2q_s16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vtrn2q_s32(<4 x i32> %a) { +; CHECK: test_same_vtrn2q_s32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_same_vtrn2_u8(<8 x i8> %a) { +; CHECK: test_same_vtrn2_u8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vtrn2q_u8(<16 x i8> %a) { +; CHECK: test_same_vtrn2q_u8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vtrn2_u16(<4 x i16> %a) { +; CHECK: test_same_vtrn2_u16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vtrn2q_u16(<8 x i16> %a) { +; CHECK: test_same_vtrn2q_u16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_same_vtrn2q_u32(<4 x i32> %a) { +; CHECK: test_same_vtrn2q_u32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_same_vtrn2q_f32(<4 x float> %a) { +; CHECK: test_same_vtrn2q_f32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_same_vtrn2_p8(<8 x i8> %a) { +; CHECK: test_same_vtrn2_p8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_same_vtrn2q_p8(<16 x i8> %a) { +; CHECK: test_same_vtrn2q_p8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_same_vtrn2_p16(<4 x i16> %a) { +; CHECK: test_same_vtrn2_p16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_same_vtrn2q_p16(<8 x i16> %a) { +; CHECK: test_same_vtrn2q_p16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %shuffle.i +} + + +define <8 x i8> @test_undef_vuzp1_s8(<8 x i8> %a) { +; CHECK: test_undef_vuzp1_s8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vuzp1q_s8(<16 x i8> %a) { +; CHECK: test_undef_vuzp1q_s8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vuzp1_s16(<4 x i16> %a) { +; CHECK: test_undef_vuzp1_s16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vuzp1q_s16(<8 x i16> %a) { +; CHECK: test_undef_vuzp1q_s16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vuzp1q_s32(<4 x i32> %a) { +; CHECK: test_undef_vuzp1q_s32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_undef_vuzp1_u8(<8 x i8> %a) { +; CHECK: test_undef_vuzp1_u8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vuzp1q_u8(<16 x i8> %a) { +; CHECK: test_undef_vuzp1q_u8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vuzp1_u16(<4 x i16> %a) { +; CHECK: test_undef_vuzp1_u16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vuzp1q_u16(<8 x i16> %a) { +; CHECK: test_undef_vuzp1q_u16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vuzp1q_u32(<4 x i32> %a) { +; CHECK: test_undef_vuzp1q_u32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_undef_vuzp1q_f32(<4 x float> %a) { +; CHECK: test_undef_vuzp1q_f32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_undef_vuzp1_p8(<8 x i8> %a) { +; CHECK: test_undef_vuzp1_p8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vuzp1q_p8(<16 x i8> %a) { +; CHECK: test_undef_vuzp1q_p8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vuzp1_p16(<4 x i16> %a) { +; CHECK: test_undef_vuzp1_p16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vuzp1q_p16(<8 x i16> %a) { +; CHECK: test_undef_vuzp1q_p16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_undef_vuzp2_s8(<8 x i8> %a) { +; CHECK: test_undef_vuzp2_s8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vuzp2q_s8(<16 x i8> %a) { +; CHECK: test_undef_vuzp2q_s8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vuzp2_s16(<4 x i16> %a) { +; CHECK: test_undef_vuzp2_s16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vuzp2q_s16(<8 x i16> %a) { +; CHECK: test_undef_vuzp2q_s16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vuzp2q_s32(<4 x i32> %a) { +; CHECK: test_undef_vuzp2q_s32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_undef_vuzp2_u8(<8 x i8> %a) { +; CHECK: test_undef_vuzp2_u8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vuzp2q_u8(<16 x i8> %a) { +; CHECK: test_undef_vuzp2q_u8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vuzp2_u16(<4 x i16> %a) { +; CHECK: test_undef_vuzp2_u16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vuzp2q_u16(<8 x i16> %a) { +; CHECK: test_undef_vuzp2q_u16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vuzp2q_u32(<4 x i32> %a) { +; CHECK: test_undef_vuzp2q_u32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_undef_vuzp2q_f32(<4 x float> %a) { +; CHECK: test_undef_vuzp2q_f32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_undef_vuzp2_p8(<8 x i8> %a) { +; CHECK: test_undef_vuzp2_p8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vuzp2q_p8(<16 x i8> %a) { +; CHECK: test_undef_vuzp2q_p8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vuzp2_p16(<4 x i16> %a) { +; CHECK: test_undef_vuzp2_p16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vuzp2q_p16(<8 x i16> %a) { +; CHECK: test_undef_vuzp2q_p16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_undef_vzip1_s8(<8 x i8> %a) { +; CHECK: test_undef_vzip1_s8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vzip1q_s8(<16 x i8> %a) { +; CHECK: test_undef_vzip1q_s8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vzip1_s16(<4 x i16> %a) { +; CHECK: test_undef_vzip1_s16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vzip1q_s16(<8 x i16> %a) { +; CHECK: test_undef_vzip1q_s16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vzip1q_s32(<4 x i32> %a) { +; CHECK: test_undef_vzip1q_s32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_undef_vzip1_u8(<8 x i8> %a) { +; CHECK: test_undef_vzip1_u8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vzip1q_u8(<16 x i8> %a) { +; CHECK: test_undef_vzip1q_u8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vzip1_u16(<4 x i16> %a) { +; CHECK: test_undef_vzip1_u16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vzip1q_u16(<8 x i16> %a) { +; CHECK: test_undef_vzip1q_u16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vzip1q_u32(<4 x i32> %a) { +; CHECK: test_undef_vzip1q_u32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_undef_vzip1q_f32(<4 x float> %a) { +; CHECK: test_undef_vzip1q_f32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_undef_vzip1_p8(<8 x i8> %a) { +; CHECK: test_undef_vzip1_p8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vzip1q_p8(<16 x i8> %a) { +; CHECK: test_undef_vzip1q_p8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vzip1_p16(<4 x i16> %a) { +; CHECK: test_undef_vzip1_p16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vzip1q_p16(<8 x i16> %a) { +; CHECK: test_undef_vzip1q_p16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_undef_vzip2_s8(<8 x i8> %a) { +; CHECK: test_undef_vzip2_s8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vzip2q_s8(<16 x i8> %a) { +; CHECK: test_undef_vzip2q_s8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vzip2_s16(<4 x i16> %a) { +; CHECK: test_undef_vzip2_s16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vzip2q_s16(<8 x i16> %a) { +; CHECK: test_undef_vzip2q_s16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vzip2q_s32(<4 x i32> %a) { +; CHECK: test_undef_vzip2q_s32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_undef_vzip2_u8(<8 x i8> %a) { +; CHECK: test_undef_vzip2_u8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vzip2q_u8(<16 x i8> %a) { +; CHECK: test_undef_vzip2q_u8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vzip2_u16(<4 x i16> %a) { +; CHECK: test_undef_vzip2_u16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vzip2q_u16(<8 x i16> %a) { +; CHECK: test_undef_vzip2q_u16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vzip2q_u32(<4 x i32> %a) { +; CHECK: test_undef_vzip2q_u32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_undef_vzip2q_f32(<4 x float> %a) { +; CHECK: test_undef_vzip2q_f32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_undef_vzip2_p8(<8 x i8> %a) { +; CHECK: test_undef_vzip2_p8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vzip2q_p8(<16 x i8> %a) { +; CHECK: test_undef_vzip2q_p8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vzip2_p16(<4 x i16> %a) { +; CHECK: test_undef_vzip2_p16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vzip2q_p16(<8 x i16> %a) { +; CHECK: test_undef_vzip2q_p16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_undef_vtrn1_s8(<8 x i8> %a) { +; CHECK: test_undef_vtrn1_s8: +; CHECK: ret +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vtrn1q_s8(<16 x i8> %a) { +; CHECK: test_undef_vtrn1q_s8: +; CHECK: ret +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vtrn1_s16(<4 x i16> %a) { +; CHECK: test_undef_vtrn1_s16: +; CHECK: ret +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vtrn1q_s16(<8 x i16> %a) { +; CHECK: test_undef_vtrn1q_s16: +; CHECK: ret +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vtrn1q_s32(<4 x i32> %a) { +; CHECK: test_undef_vtrn1q_s32: +; CHECK: ret +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_undef_vtrn1_u8(<8 x i8> %a) { +; CHECK: test_undef_vtrn1_u8: +; CHECK: ret +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vtrn1q_u8(<16 x i8> %a) { +; CHECK: test_undef_vtrn1q_u8: +; CHECK: ret +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vtrn1_u16(<4 x i16> %a) { +; CHECK: test_undef_vtrn1_u16: +; CHECK: ret +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vtrn1q_u16(<8 x i16> %a) { +; CHECK: test_undef_vtrn1q_u16: +; CHECK: ret +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vtrn1q_u32(<4 x i32> %a) { +; CHECK: test_undef_vtrn1q_u32: +; CHECK: ret +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_undef_vtrn1q_f32(<4 x float> %a) { +; CHECK: test_undef_vtrn1q_f32: +; CHECK: ret +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_undef_vtrn1_p8(<8 x i8> %a) { +; CHECK: test_undef_vtrn1_p8: +; CHECK: ret +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vtrn1q_p8(<16 x i8> %a) { +; CHECK: test_undef_vtrn1q_p8: +; CHECK: ret +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vtrn1_p16(<4 x i16> %a) { +; CHECK: test_undef_vtrn1_p16: +; CHECK: ret +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vtrn1q_p16(<8 x i16> %a) { +; CHECK: test_undef_vtrn1q_p16: +; CHECK: ret +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_undef_vtrn2_s8(<8 x i8> %a) { +; CHECK: test_undef_vtrn2_s8: +; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vtrn2q_s8(<16 x i8> %a) { +; CHECK: test_undef_vtrn2q_s8: +; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vtrn2_s16(<4 x i16> %a) { +; CHECK: test_undef_vtrn2_s16: +; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vtrn2q_s16(<8 x i16> %a) { +; CHECK: test_undef_vtrn2q_s16: +; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vtrn2q_s32(<4 x i32> %a) { +; CHECK: test_undef_vtrn2q_s32: +; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <8 x i8> @test_undef_vtrn2_u8(<8 x i8> %a) { +; CHECK: test_undef_vtrn2_u8: +; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vtrn2q_u8(<16 x i8> %a) { +; CHECK: test_undef_vtrn2q_u8: +; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vtrn2_u16(<4 x i16> %a) { +; CHECK: test_undef_vtrn2_u16: +; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vtrn2q_u16(<8 x i16> %a) { +; CHECK: test_undef_vtrn2q_u16: +; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <4 x i32> @test_undef_vtrn2q_u32(<4 x i32> %a) { +; CHECK: test_undef_vtrn2q_u32: +; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <4 x float> @test_undef_vtrn2q_f32(<4 x float> %a) { +; CHECK: test_undef_vtrn2q_f32: +; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <8 x i8> @test_undef_vtrn2_p8(<8 x i8> %a) { +; CHECK: test_undef_vtrn2_p8: +; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_undef_vtrn2q_p8(<16 x i8> %a) { +; CHECK: test_undef_vtrn2q_p8: +; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_undef_vtrn2_p16(<4 x i16> %a) { +; CHECK: test_undef_vtrn2_p16: +; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_undef_vtrn2q_p16(<8 x i16> %a) { +; CHECK: test_undef_vtrn2q_p16: +; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle.i +} + define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK: test_vuzp_s8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll index 09ca880..4f322e0 100644 --- a/test/CodeGen/AArch64/neon-scalar-add-sub.ll +++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll @@ -1,13 +1,13 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} %tmp3 = add <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} %tmp3 = sub <1 x i64> %A, %B; ret <1 x i64> %tmp3 } @@ -18,14 +18,14 @@ declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>) define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_add_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uadd_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -35,14 +35,14 @@ declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>) define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sub_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_usub_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll index 8ce42de..247514c 100644 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll +++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll @@ -5,7 +5,7 @@ declare double @llvm.fma.f64(double, double, double) define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmla_ss4S - ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) ret float %tmp2 @@ -13,7 +13,7 @@ define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) { define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmla_ss4S_swap - ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a) ret float %tmp2 @@ -21,7 +21,7 @@ define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) { define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) { ; CHECK: test_fmla_ss2S - ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) ret float %tmp2 @@ -29,7 +29,7 @@ define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) { define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) { ; CHECK: test_fmla_ddD - ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) ret double %tmp2 @@ -37,7 +37,7 @@ define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) { define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmla_dd2D - ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) ret double %tmp2 @@ -45,7 +45,7 @@ define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) { define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmla_dd2D_swap - ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a) ret double %tmp2 @@ -53,7 +53,7 @@ define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) { define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmls_ss4S - ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fsub float -0.0, %tmp1 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) @@ -62,7 +62,7 @@ define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) { define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmls_ss4S_swap - ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fsub float -0.0, %tmp1 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a) @@ -72,7 +72,7 @@ define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) { define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) { ; CHECK: test_fmls_ss2S - ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = fsub float -0.0, %tmp1 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) @@ -81,7 +81,7 @@ define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) { define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) { ; CHECK: test_fmls_ddD - ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = fsub double -0.0, %tmp1 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) @@ -90,7 +90,7 @@ define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) { define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmls_dd2D - ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fsub double -0.0, %tmp1 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) @@ -99,7 +99,7 @@ define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) { define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmls_dd2D_swap - ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fsub double -0.0, %tmp1 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a) diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll index 968ad3e..c9128e7 100644 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll +++ b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll @@ -2,7 +2,7 @@ define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { ; CHECK: test_fmul_lane_ss2S - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = fmul float %a, %tmp1; ret float %tmp2; @@ -10,7 +10,7 @@ define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) { ; CHECK: test_fmul_lane_ss2S_swap - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = fmul float %tmp1, %a; ret float %tmp2; @@ -19,7 +19,7 @@ define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) { define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) { ; CHECK: test_fmul_lane_ss4S - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fmul float %a, %tmp1; ret float %tmp2; @@ -27,7 +27,7 @@ define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) { define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) { ; CHECK: test_fmul_lane_ss4S_swap - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fmul float %tmp1, %a; ret float %tmp2; @@ -36,7 +36,7 @@ define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) { define double @test_fmul_lane_ddD(double %a, <1 x double> %v) { ; CHECK: test_fmul_lane_ddD - ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = fmul double %a, %tmp1; ret double %tmp2; @@ -46,7 +46,7 @@ define double @test_fmul_lane_ddD(double %a, <1 x double> %v) { define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) { ; CHECK: test_fmul_lane_dd2D - ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fmul double %a, %tmp1; ret double %tmp2; @@ -55,7 +55,7 @@ define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) { define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) { ; CHECK: test_fmul_lane_dd2D_swap - ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fmul double %tmp1, %a; ret double %tmp2; @@ -65,7 +65,7 @@ declare float @llvm.aarch64.neon.vmulx.f32(float, float) define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { ; CHECK: test_fmulx_lane_f32 - ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) ret float %tmp2; @@ -73,7 +73,7 @@ define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { ; CHECK: test_fmulx_laneq_f32 - ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) ret float %tmp2; @@ -81,7 +81,7 @@ define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) { ; CHECK: test_fmulx_laneq_f32_swap - ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a) ret float %tmp2; @@ -91,7 +91,7 @@ declare double @llvm.aarch64.neon.vmulx.f64(double, double) define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { ; CHECK: test_fmulx_lane_f64 - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) ret double %tmp2; @@ -99,7 +99,7 @@ define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { ; CHECK: test_fmulx_laneq_f64_0 - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <2 x double> %v, i32 0 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) ret double %tmp2; @@ -108,7 +108,7 @@ define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { ; CHECK: test_fmulx_laneq_f64_1 - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) ret double %tmp2; @@ -116,7 +116,7 @@ define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) { ; CHECK: test_fmulx_laneq_f64_1_swap - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a) ret double %tmp2; diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll index 5f10cbb..e1f3964 100644 --- a/test/CodeGen/AArch64/neon-scalar-compare.ll +++ b/test/CodeGen/AArch64/neon-scalar-compare.ll @@ -122,28 +122,28 @@ entry: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcage_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 ret <1 x i64> %vcage2.i } define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcagt_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 ret <1 x i64> %vcagt2.i } define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcale_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 ret <1 x i64> %vcage2.i } define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcalt_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 ret <1 x i64> %vcagt2.i } @@ -271,7 +271,7 @@ define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 { ; CHECK: test_vceqz_s64 ; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = zext <1 x i1> %1 to <1 x i64> + %vceqz.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vceqz.i } @@ -279,7 +279,7 @@ define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 { ; CHECK: test_vceqz_u64 ; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = zext <1 x i1> %1 to <1 x i64> + %vceqz.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vceqz.i } @@ -287,7 +287,7 @@ define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 { ; CHECK: test_vceqz_p64 ; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = zext <1 x i1> %1 to <1 x i64> + %vceqz.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vceqz.i } @@ -295,7 +295,7 @@ define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 { ; CHECK: test_vceqzq_p64 ; CHECK: cmeq {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0 %1 = icmp eq <2 x i64> %a, zeroinitializer - %vceqz.i = zext <2 x i1> %1 to <2 x i64> + %vceqz.i = sext <2 x i1> %1 to <2 x i64> ret <2 x i64> %vceqz.i } @@ -303,7 +303,7 @@ define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 { ; CHECK: test_vcgez_s64 ; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 %1 = icmp sge <1 x i64> %a, zeroinitializer - %vcgez.i = zext <1 x i1> %1 to <1 x i64> + %vcgez.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vcgez.i } @@ -311,7 +311,7 @@ define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 { ; CHECK: test_vclez_s64 ; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 %1 = icmp sle <1 x i64> %a, zeroinitializer - %vclez.i = zext <1 x i1> %1 to <1 x i64> + %vclez.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vclez.i } @@ -319,7 +319,7 @@ define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 { ; CHECK: test_vcgtz_s64 ; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 %1 = icmp sgt <1 x i64> %a, zeroinitializer - %vcgtz.i = zext <1 x i1> %1 to <1 x i64> + %vcgtz.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vcgtz.i } @@ -327,12 +327,12 @@ define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { ; CHECK: test_vcltz_s64 ; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0 %1 = icmp slt <1 x i64> %a, zeroinitializer - %vcltz.i = zext <1 x i1> %1 to <1 x i64> + %vcltz.i = sext <1 x i1> %1 to <1 x i64> ret <1 x i64> %vcltz.i } -declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>) declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index d433ff5..fadd734 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -2,21 +2,30 @@ define float @test_dup_sv2S(<2 x float> %v) { ;CHECK: test_dup_sv2S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 } +define float @test_dup_sv2S_0(<2 x float> %v) { + ;CHECK-LABEL: test_dup_sv2S_0 + ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK: ret + %tmp1 = extractelement <2 x float> %v, i32 0 + ret float %tmp1 +} + define float @test_dup_sv4S(<4 x float> %v) { - ;CHECK: test_dup_sv4S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[0] + ;CHECK-LABEL: test_dup_sv4S + ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK: ret %tmp1 = extractelement <4 x float> %v, i32 0 ret float %tmp1 } define double @test_dup_dvD(<1 x double> %v) { ;CHECK: test_dup_dvD - ;CHECK-NOT: dup {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] ;CHECK: ret %tmp1 = extractelement <1 x double> %v, i32 0 ret double %tmp1 @@ -24,63 +33,71 @@ define double @test_dup_dvD(<1 x double> %v) { define double @test_dup_dv2D(<2 x double> %v) { ;CHECK: test_dup_dv2D - ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + %tmp1 = extractelement <2 x double> %v, i32 1 + ret double %tmp1 +} + +define double @test_dup_dv2D_0(<2 x double> %v) { + ;CHECK: test_dup_dv2D_0 + ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] + ;CHECK: ret %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 } define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { ;CHECK: test_vector_dup_bv16B - ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[14] + ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14] %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) { ;CHECK: test_vector_dup_bv8B - ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[7] + ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7] %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) { ;CHECK: test_vector_dup_hv8H - ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[7] + ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) { ;CHECK: test_vector_dup_hv4H - ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[3] + ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) { ;CHECK: test_vector_dup_sv4S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3] %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { ;CHECK: test_vector_dup_sv2S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { ;CHECK: test_vector_dup_dv2D - ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> ret <1 x i64> %shuffle.i } define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) { ;CHECK: test_vector_copy_dup_dv2D - ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] %vget_lane = extractelement <2 x i64> %c, i32 1 %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0 ret <1 x i64> %vset_lane diff --git a/test/CodeGen/AArch64/neon-scalar-cvt.ll b/test/CodeGen/AArch64/neon-scalar-cvt.ll index a06d5d6..3a19bed 100644 --- a/test/CodeGen/AArch64/neon-scalar-cvt.ll +++ b/test/CodeGen/AArch64/neon-scalar-cvt.ll @@ -5,133 +5,129 @@ define float @test_vcvts_f32_s32(i32 %a) { ; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}} entry: %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32> %vcvtf.i) + %0 = call float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32> %vcvtf.i) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32>) +declare float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32>) define double @test_vcvtd_f64_s64(i64 %a) { ; CHECK: test_vcvtd_f64_s64 ; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} entry: %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64> %vcvtf.i) + %0 = call double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64> %vcvtf.i) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64>) +declare double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64>) define float @test_vcvts_f32_u32(i32 %a) { ; CHECK: test_vcvts_f32_u32 ; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}} entry: %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32> %vcvtf.i) + %0 = call float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32> %vcvtf.i) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32>) +declare float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32>) define double @test_vcvtd_f64_u64(i64 %a) { ; CHECK: test_vcvtd_f64_u64 ; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}} entry: %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64> %vcvtf.i) + %0 = call double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64> %vcvtf.i) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64>) +declare double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64>) define float @test_vcvts_n_f32_s32(i32 %a) { ; CHECK: test_vcvts_n_f32_s32 ; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32> %vcvtf, i32 1) + %0 = call float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32>, i32) +declare float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32>, i32) define double @test_vcvtd_n_f64_s64(i64 %a) { ; CHECK: test_vcvtd_n_f64_s64 ; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64> %vcvtf, i32 1) + %0 = call double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64>, i32) +declare double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64>, i32) define float @test_vcvts_n_f32_u32(i32 %a) { ; CHECK: test_vcvts_n_f32_u32 ; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32> %vcvtf, i32 1) + %0 = call float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32>, i32) +declare float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32>, i32) define double @test_vcvtd_n_f64_u64(i64 %a) { ; CHECK: test_vcvtd_n_f64_u64 ; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64> %vcvtf, i32 1) + %0 = call double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64>, i32) +declare double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64>, i32) define i32 @test_vcvts_n_s32_f32(float %a) { ; CHECK: test_vcvts_n_s32_f32 ; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1 entry: - %fcvtzs = insertelement <1 x float> undef, float %a, i32 0 - %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 1) + %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float %a, i32 1) %0 = extractelement <1 x i32> %fcvtzs1, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float>, i32) +declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float, i32) define i64 @test_vcvtd_n_s64_f64(double %a) { ; CHECK: test_vcvtd_n_s64_f64 ; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1 entry: - %fcvtzs = insertelement <1 x double> undef, double %a, i32 0 - %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 1) + %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double %a, i32 1) %0 = extractelement <1 x i64> %fcvtzs1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double>, i32) +declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double, i32) define i32 @test_vcvts_n_u32_f32(float %a) { ; CHECK: test_vcvts_n_u32_f32 ; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32 entry: - %fcvtzu = insertelement <1 x float> undef, float %a, i32 0 - %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 32) + %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float %a, i32 32) %0 = extractelement <1 x i32> %fcvtzu1, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float>, i32) +declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float, i32) define i64 @test_vcvtd_n_u64_f64(double %a) { ; CHECK: test_vcvtd_n_u64_f64 ; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64 entry: - %fcvtzu = insertelement <1 x double> undef, double %a, i32 0 - %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 64) + %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double %a, i32 64) %0 = extractelement <1 x i64> %fcvtzu1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double>, i32) +declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double, i32) diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll new file mode 100644 index 0000000..51dea06 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-ext.ll @@ -0,0 +1,113 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i32_v1i64: +; CHECK: ushll v0.2d, v0.2s, #0 + %1 = extractelement <2 x i32> %v, i32 0 + %2 = insertelement <1 x i32> undef, i32 %1, i32 0 + %3 = zext <1 x i32> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i16_v1i32: +; CHECK: ushll v0.4s, v0.4h, #0 + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = zext <1 x i16> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i8_v1i16: +; CHECK: ushll v0.8h, v0.8b, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = zext <1 x i8> %2 to <1 x i16> + ret <1 x i16> %3 +} + +define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i8_v1i32: +; CHECK: dup b0, v0.b[0] + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = zext <1 x i8> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i16_v1i64: +; CHECK: dup h0, v0.h[0] + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = zext <1 x i16> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i8_v1i64: +; CHECK: dup b0, v0.b[0] + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = zext <1 x i8> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i32_v1i64: +; CHECK: sshll v0.2d, v0.2s, #0 + %1 = extractelement <2 x i32> %v, i32 0 + %2 = insertelement <1 x i32> undef, i32 %1, i32 0 + %3 = sext <1 x i32> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i16_v1i32: +; CHECK: sshll v0.4s, v0.4h, #0 + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = sext <1 x i16> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i8_v1i16: +; CHECK: sshll v0.8h, v0.8b, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = sext <1 x i8> %2 to <1 x i16> + ret <1 x i16> %3 +} + +define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i8_v1i32: +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK: sshll v0.4s, v0.4h, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = sext <1 x i8> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i16_v1i64: +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK: sshll v0.2d, v0.2s, #0 + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = sext <1 x i16> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i8_v1i64: +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK: sshll v0.2d, v0.2s, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = sext <1 x i8> %2 to <1 x i64> + ret <1 x i64> %3 +} diff --git a/test/CodeGen/AArch64/neon-scalar-fabd.ll b/test/CodeGen/AArch64/neon-scalar-fabd.ll index 75686d3..6343310 100644 --- a/test/CodeGen/AArch64/neon-scalar-fabd.ll +++ b/test/CodeGen/AArch64/neon-scalar-fabd.ll @@ -4,10 +4,7 @@ define float @test_vabds_f32(float %a, float %b) { ; CHECK-LABEL: test_vabds_f32 ; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} entry: - %vabd.i = insertelement <1 x float> undef, float %a, i32 0 - %vabd1.i = insertelement <1 x float> undef, float %b, i32 0 - %vabd2.i = call <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float> %vabd.i, <1 x float> %vabd1.i) - %0 = extractelement <1 x float> %vabd2.i, i32 0 + %0 = call float @llvm.aarch64.neon.vabd.f32(float %a, float %a) ret float %0 } @@ -15,12 +12,9 @@ define double @test_vabdd_f64(double %a, double %b) { ; CHECK-LABEL: test_vabdd_f64 ; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} entry: - %vabd.i = insertelement <1 x double> undef, double %a, i32 0 - %vabd1.i = insertelement <1 x double> undef, double %b, i32 0 - %vabd2.i = call <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double> %vabd.i, <1 x double> %vabd1.i) - %0 = extractelement <1 x double> %vabd2.i, i32 0 + %0 = call double @llvm.aarch64.neon.vabd.f64(double %a, double %b) ret double %0 } -declare <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double>, <1 x double>) -declare <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float>, <1 x float>) +declare double @llvm.aarch64.neon.vabd.f64(double, double) +declare float @llvm.aarch64.neon.vabd.f32(float, float) diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll index d7b84fa..6cf30a7 100644 --- a/test/CodeGen/AArch64/neon-scalar-fcvt.ll +++ b/test/CodeGen/AArch64/neon-scalar-fcvt.ll @@ -6,250 +6,228 @@ define float @test_vcvtxn(double %a) { ; CHECK: test_vcvtxn ; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}} entry: - %vcvtf.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i) - %0 = extractelement <1 x float> %vcvtf1.i, i32 0 - ret float %0 + %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a) + ret float %vcvtf } -declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.fcvtxn(double) define i32 @test_vcvtass(float %a) { ; CHECK: test_vcvtass ; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i) + %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtas1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float) define i64 @test_test_vcvtasd(double %a) { ; CHECK: test_test_vcvtasd ; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i) + %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtas1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double) define i32 @test_vcvtaus(float %a) { ; CHECK: test_vcvtaus ; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i) + %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtau1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float) define i64 @test_vcvtaud(double %a) { ; CHECK: test_vcvtaud ; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i) + %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtau1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) define i32 @test_vcvtmss(float %a) { ; CHECK: test_vcvtmss ; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i) + %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtms1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float) define i64 @test_vcvtmd_s64_f64(double %a) { ; CHECK: test_vcvtmd_s64_f64 ; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i) + %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtms1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double) define i32 @test_vcvtmus(float %a) { ; CHECK: test_vcvtmus ; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i) + %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float) define i64 @test_vcvtmud(double %a) { ; CHECK: test_vcvtmud ; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i) + %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double) define i32 @test_vcvtnss(float %a) { ; CHECK: test_vcvtnss ; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i) + %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtns1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float) define i64 @test_vcvtnd_s64_f64(double %a) { ; CHECK: test_vcvtnd_s64_f64 ; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i) + %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtns1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double) define i32 @test_vcvtnus(float %a) { ; CHECK: test_vcvtnus ; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i) + %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float) define i64 @test_vcvtnud(double %a) { ; CHECK: test_vcvtnud ; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i) + %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double) define i32 @test_vcvtpss(float %a) { ; CHECK: test_vcvtpss ; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i) + %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtps1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float) define i64 @test_vcvtpd_s64_f64(double %a) { ; CHECK: test_vcvtpd_s64_f64 ; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i) + %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtps1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double) define i32 @test_vcvtpus(float %a) { ; CHECK: test_vcvtpus ; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i) + %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float) define i64 @test_vcvtpud(double %a) { ; CHECK: test_vcvtpud ; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i) + %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double) define i32 @test_vcvtss(float %a) { ; CHECK: test_vcvtss ; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i) + %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float) define i64 @test_vcvtd_s64_f64(double %a) { ; CHECK: test_vcvtd_s64_f64 ; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}} entry: - %vcvzs.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i) + %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvzs1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double) define i32 @test_vcvtus(float %a) { ; CHECK: test_vcvtus ; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i) + %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float) define i64 @test_vcvtud(double %a) { ; CHECK: test_vcvtud ; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i) + %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double) diff --git a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll index a6e5859..e0dce13 100644 --- a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll +++ b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll @@ -3,326 +3,280 @@ ;; Scalar Floating-point Compare define i32 @test_vceqs_f32(float %a, float %b) { -; CHECK: test_vceqs_f32 +; CHECK-LABEL: test_vceqs_f32 ; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vceq.i = insertelement <1 x float> undef, float %a, i32 0 - %vceq1.i = insertelement <1 x float> undef, float %b, i32 0 - %vceq2.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> %vceq1.i) - %0 = extractelement <1 x i32> %vceq2.i, i32 0 + %fceq2.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fceq2.i, i32 0 ret i32 %0 } define i64 @test_vceqd_f64(double %a, double %b) { -; CHECK: test_vceqd_f64 +; CHECK-LABEL: test_vceqd_f64 ; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vceq.i = insertelement <1 x double> undef, double %a, i32 0 - %vceq1.i = insertelement <1 x double> undef, double %b, i32 0 - %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double> %vceq.i, <1 x double> %vceq1.i) - %0 = extractelement <1 x i64> %vceq2.i, i32 0 + %fceq2.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fceq2.i, i32 0 ret i64 %0 } -define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 { -; CHECK: test_vceqz_f64 +define <1 x i64> @test_vceqz_f64(<1 x double> %a) { +; CHECK-LABEL: test_vceqz_f64 ; CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0 entry: %0 = fcmp oeq <1 x double> %a, zeroinitializer - %vceqz.i = zext <1 x i1> %0 to <1 x i64> + %vceqz.i = sext <1 x i1> %0 to <1 x i64> ret <1 x i64> %vceqz.i } define i32 @test_vceqzs_f32(float %a) { -; CHECK: test_vceqzs_f32 +; CHECK-LABEL: test_vceqzs_f32 ; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vceq.i = insertelement <1 x float> undef, float %a, i32 0 - %vceq1.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vceq1.i, i32 0 + %fceq1.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fceq1.i, i32 0 ret i32 %0 } define i64 @test_vceqzd_f64(double %a) { -; CHECK: test_vceqzd_f64 +; CHECK-LABEL: test_vceqzd_f64 ; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vceq.i = insertelement <1 x double> undef, double %a, i32 0 - %vceq1.i = tail call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double> %vceq.i, <1 x float> zeroinitializer) #5 - %0 = extractelement <1 x i64> %vceq1.i, i32 0 + %fceq1.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fceq1.i, i32 0 ret i64 %0 } define i32 @test_vcges_f32(float %a, float %b) { -; CHECK: test_vcges_f32 +; CHECK-LABEL: test_vcges_f32 ; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcge.i = insertelement <1 x float> undef, float %a, i32 0 - %vcge1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i) - %0 = extractelement <1 x i32> %vcge2.i, i32 0 + %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcge2.i, i32 0 ret i32 %0 } define i64 @test_vcged_f64(double %a, double %b) { -; CHECK: test_vcged_f64 +; CHECK-LABEL: test_vcged_f64 ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcge.i = insertelement <1 x double> undef, double %a, i32 0 - %vcge1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 + %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcge2.i, i32 0 ret i64 %0 } define i32 @test_vcgezs_f32(float %a) { -; CHECK: test_vcgezs_f32 +; CHECK-LABEL: test_vcgezs_f32 ; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vcge.i = insertelement <1 x float> undef, float %a, i32 0 - %vcge1.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vcge1.i, i32 0 + %fcge1.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fcge1.i, i32 0 ret i32 %0 } define i64 @test_vcgezd_f64(double %a) { -; CHECK: test_vcgezd_f64 +; CHECK-LABEL: test_vcgezd_f64 ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vcge.i = insertelement <1 x double> undef, double %a, i32 0 - %vcge1.i = tail call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double> %vcge.i, <1 x float> zeroinitializer) #5 - %0 = extractelement <1 x i64> %vcge1.i, i32 0 + %fcge1.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fcge1.i, i32 0 ret i64 %0 } define i32 @test_vcgts_f32(float %a, float %b) { -; CHECK: test_vcgts_f32 +; CHECK-LABEL: test_vcgts_f32 ; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcgt.i = insertelement <1 x float> undef, float %a, i32 0 - %vcgt1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i) - %0 = extractelement <1 x i32> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcgt2.i, i32 0 ret i32 %0 } define i64 @test_vcgtd_f64(double %a, double %b) { -; CHECK: test_vcgtd_f64 +; CHECK-LABEL: test_vcgtd_f64 ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcgt.i = insertelement <1 x double> undef, double %a, i32 0 - %vcgt1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcgt2.i, i32 0 ret i64 %0 } define i32 @test_vcgtzs_f32(float %a) { -; CHECK: test_vcgtzs_f32 +; CHECK-LABEL: test_vcgtzs_f32 ; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vcgt.i = insertelement <1 x float> undef, float %a, i32 0 - %vcgt1.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vcgt1.i, i32 0 + %fcgt1.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fcgt1.i, i32 0 ret i32 %0 } define i64 @test_vcgtzd_f64(double %a) { -; CHECK: test_vcgtzd_f64 +; CHECK-LABEL: test_vcgtzd_f64 ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vcgt.i = insertelement <1 x double> undef, double %a, i32 0 - %vcgt1.i = tail call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double> %vcgt.i, <1 x float> zeroinitializer) #5 - %0 = extractelement <1 x i64> %vcgt1.i, i32 0 + %fcgt1.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fcgt1.i, i32 0 ret i64 %0 } define i32 @test_vcles_f32(float %a, float %b) { -; CHECK: test_vcles_f32 +; CHECK-LABEL: test_vcles_f32 ; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcge.i = insertelement <1 x float> undef, float %a, i32 0 - %vcge1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i) - %0 = extractelement <1 x i32> %vcge2.i, i32 0 + %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcge2.i, i32 0 ret i32 %0 } define i64 @test_vcled_f64(double %a, double %b) { -; CHECK: test_vcled_f64 +; CHECK-LABEL: test_vcled_f64 ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcge.i = insertelement <1 x double> undef, double %a, i32 0 - %vcge1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 + %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcge2.i, i32 0 ret i64 %0 } define i32 @test_vclezs_f32(float %a) { -; CHECK: test_vclezs_f32 +; CHECK-LABEL: test_vclezs_f32 ; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vcle.i = insertelement <1 x float> undef, float %a, i32 0 - %vcle1.i = call <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float> %vcle.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vcle1.i, i32 0 + %fcle1.i = call <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fcle1.i, i32 0 ret i32 %0 } define i64 @test_vclezd_f64(double %a) { -; CHECK: test_vclezd_f64 +; CHECK-LABEL: test_vclezd_f64 ; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vcle.i = insertelement <1 x double> undef, double %a, i32 0 - %vcle1.i = tail call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double> %vcle.i, <1 x float> zeroinitializer) #5 - %0 = extractelement <1 x i64> %vcle1.i, i32 0 + %fcle1.i = call <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fcle1.i, i32 0 ret i64 %0 } define i32 @test_vclts_f32(float %a, float %b) { -; CHECK: test_vclts_f32 +; CHECK-LABEL: test_vclts_f32 ; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcgt.i = insertelement <1 x float> undef, float %b, i32 0 - %vcgt1.i = insertelement <1 x float> undef, float %a, i32 0 - %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i) - %0 = extractelement <1 x i32> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcgt2.i, i32 0 ret i32 %0 } define i64 @test_vcltd_f64(double %a, double %b) { -; CHECK: test_vcltd_f64 +; CHECK-LABEL: test_vcltd_f64 ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcgt.i = insertelement <1 x double> undef, double %b, i32 0 - %vcgt1.i = insertelement <1 x double> undef, double %a, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcgt2.i, i32 0 ret i64 %0 } define i32 @test_vcltzs_f32(float %a) { -; CHECK: test_vcltzs_f32 +; CHECK-LABEL: test_vcltzs_f32 ; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vclt.i = insertelement <1 x float> undef, float %a, i32 0 - %vclt1.i = call <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float> %vclt.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vclt1.i, i32 0 + %fclt1.i = call <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fclt1.i, i32 0 ret i32 %0 } define i64 @test_vcltzd_f64(double %a) { -; CHECK: test_vcltzd_f64 +; CHECK-LABEL: test_vcltzd_f64 ; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vclt.i = insertelement <1 x double> undef, double %a, i32 0 - %vclt1.i = tail call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double> %vclt.i, <1 x float> zeroinitializer) #5 - %0 = extractelement <1 x i64> %vclt1.i, i32 0 + %fclt1.i = call <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fclt1.i, i32 0 ret i64 %0 } define i32 @test_vcages_f32(float %a, float %b) { -; CHECK: test_vcages_f32 +; CHECK-LABEL: test_vcages_f32 ; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcage.i = insertelement <1 x float> undef, float %a, i32 0 - %vcage1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i) - %0 = extractelement <1 x i32> %vcage2.i, i32 0 + %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcage2.i, i32 0 ret i32 %0 } define i64 @test_vcaged_f64(double %a, double %b) { -; CHECK: test_vcaged_f64 +; CHECK-LABEL: test_vcaged_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcage.i = insertelement <1 x double> undef, double %a, i32 0 - %vcage1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i) - %0 = extractelement <1 x i64> %vcage2.i, i32 0 + %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcage2.i, i32 0 ret i64 %0 } define i32 @test_vcagts_f32(float %a, float %b) { -; CHECK: test_vcagts_f32 +; CHECK-LABEL: test_vcagts_f32 ; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcagt.i = insertelement <1 x float> undef, float %a, i32 0 - %vcagt1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcagt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcagt.i, <1 x float> %vcagt1.i) - %0 = extractelement <1 x i32> %vcagt2.i, i32 0 + %fcagt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcagt2.i, i32 0 ret i32 %0 } define i64 @test_vcagtd_f64(double %a, double %b) { -; CHECK: test_vcagtd_f64 +; CHECK-LABEL: test_vcagtd_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcagt.i = insertelement <1 x double> undef, double %a, i32 0 - %vcagt1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcagt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcagt.i, <1 x double> %vcagt1.i) - %0 = extractelement <1 x i64> %vcagt2.i, i32 0 + %fcagt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcagt2.i, i32 0 ret i64 %0 } define i32 @test_vcales_f32(float %a, float %b) { -; CHECK: test_vcales_f32 +; CHECK-LABEL: test_vcales_f32 ; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcage.i = insertelement <1 x float> undef, float %b, i32 0 - %vcage1.i = insertelement <1 x float> undef, float %a, i32 0 - %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i) - %0 = extractelement <1 x i32> %vcage2.i, i32 0 + %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcage2.i, i32 0 ret i32 %0 } define i64 @test_vcaled_f64(double %a, double %b) { -; CHECK: test_vcaled_f64 +; CHECK-LABEL: test_vcaled_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcage.i = insertelement <1 x double> undef, double %b, i32 0 - %vcage1.i = insertelement <1 x double> undef, double %a, i32 0 - %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i) - %0 = extractelement <1 x i64> %vcage2.i, i32 0 + %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcage2.i, i32 0 ret i64 %0 } define i32 @test_vcalts_f32(float %a, float %b) { -; CHECK: test_vcalts_f32 +; CHECK-LABEL: test_vcalts_f32 ; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcalt.i = insertelement <1 x float> undef, float %b, i32 0 - %vcalt1.i = insertelement <1 x float> undef, float %a, i32 0 - %vcalt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcalt.i, <1 x float> %vcalt1.i) - %0 = extractelement <1 x i32> %vcalt2.i, i32 0 + %fcalt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcalt2.i, i32 0 ret i32 %0 } define i64 @test_vcaltd_f64(double %a, double %b) { -; CHECK: test_vcaltd_f64 +; CHECK-LABEL: test_vcaltd_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcalt.i = insertelement <1 x double> undef, double %b, i32 0 - %vcalt1.i = insertelement <1 x double> undef, double %a, i32 0 - %vcalt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcalt.i, <1 x double> %vcalt1.i) - %0 = extractelement <1 x i64> %vcalt2.i, i32 0 + %fcalt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcalt2.i, i32 0 ret i64 %0 } -declare <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double>, <1 x float>) -declare <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double>, <1 x float>) -declare <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double, float) +declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double, float) +declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double, float) +declare <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double, float) +declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double, float) +declare <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double, double) diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll index f21c27b..100839b 100644 --- a/test/CodeGen/AArch64/neon-scalar-recip.ll +++ b/test/CodeGen/AArch64/neon-scalar-recip.ll @@ -3,56 +3,42 @@ define float @test_vrecpss_f32(float %a, float %b) { ; CHECK: test_vrecpss_f32 ; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x float> undef, float %a, i32 0 - %2 = insertelement <1 x float> undef, float %b, i32 0 - %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2) - %4 = extractelement <1 x float> %3, i32 0 - ret float %4 + %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b) + ret float %1 } define double @test_vrecpsd_f64(double %a, double %b) { ; CHECK: test_vrecpsd_f64 ; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = insertelement <1 x double> undef, double %a, i32 0 - %2 = insertelement <1 x double> undef, double %b, i32 0 - %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2) - %4 = extractelement <1 x double> %3, i32 0 - ret double %4 + %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b) + ret double %1 } -declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>) -declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) +declare float @llvm.aarch64.neon.vrecps.f32(float, float) +declare double @llvm.aarch64.neon.vrecps.f64(double, double) define float @test_vrsqrtss_f32(float %a, float %b) { ; CHECK: test_vrsqrtss_f32 ; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x float> undef, float %a, i32 0 - %2 = insertelement <1 x float> undef, float %b, i32 0 - %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2) - %4 = extractelement <1 x float> %3, i32 0 - ret float %4 + %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b) + ret float %1 } define double @test_vrsqrtsd_f64(double %a, double %b) { ; CHECK: test_vrsqrtsd_f64 ; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = insertelement <1 x double> undef, double %a, i32 0 - %2 = insertelement <1 x double> undef, double %b, i32 0 - %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2) - %4 = extractelement <1 x double> %3, i32 0 - ret double %4 + %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b) + ret double %1 } -declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>) -declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) +declare float @llvm.aarch64.neon.vrsqrts.f32(float, float) +declare double @llvm.aarch64.neon.vrsqrts.f64(double, double) define float @test_vrecpes_f32(float %a) { ; CHECK: test_vrecpes_f32 ; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}} entry: - %vrecpe.i = insertelement <1 x float> undef, float %a, i32 0 - %vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i) - %0 = extractelement <1 x float> %vrecpe1.i, i32 0 + %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a) ret float %0 } @@ -60,22 +46,18 @@ define double @test_vrecped_f64(double %a) { ; CHECK: test_vrecped_f64 ; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}} entry: - %vrecpe.i = insertelement <1 x double> undef, double %a, i32 0 - %vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i) - %0 = extractelement <1 x double> %vrecpe1.i, i32 0 + %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a) ret double %0 } -declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>) -declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.vrecpe.f32(float) +declare double @llvm.aarch64.neon.vrecpe.f64(double) define float @test_vrecpxs_f32(float %a) { ; CHECK: test_vrecpxs_f32 ; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}} entry: - %vrecpx.i = insertelement <1 x float> undef, float %a, i32 0 - %vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i) - %0 = extractelement <1 x float> %vrecpx1.i, i32 0 + %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a) ret float %0 } @@ -83,22 +65,18 @@ define double @test_vrecpxd_f64(double %a) { ; CHECK: test_vrecpxd_f64 ; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}} entry: - %vrecpx.i = insertelement <1 x double> undef, double %a, i32 0 - %vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i) - %0 = extractelement <1 x double> %vrecpx1.i, i32 0 + %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a) ret double %0 } -declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>) -declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.vrecpx.f32(float) +declare double @llvm.aarch64.neon.vrecpx.f64(double) define float @test_vrsqrtes_f32(float %a) { ; CHECK: test_vrsqrtes_f32 ; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}} entry: - %vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0 - %vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i) - %0 = extractelement <1 x float> %vrsqrte1.i, i32 0 + %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a) ret float %0 } @@ -106,11 +84,9 @@ define double @test_vrsqrted_f64(double %a) { ; CHECK: test_vrsqrted_f64 ; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}} entry: - %vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0 - %vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i) - %0 = extractelement <1 x double> %vrsqrte1.i, i32 0 + %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a) ret double %0 } -declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>) -declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.vrsqrte.f32(float) +declare double @llvm.aarch64.neon.vrsqrte.f64(double) diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll index 80e8dc3..33ce5cf 100644 --- a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll +++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll @@ -4,210 +4,198 @@ declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { ; CHECK: test_addp_v1i64: - %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) -; CHECK: addp d0, v0.2d - ret <1 x i64> %val +; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) + ret <1 x i64> %val } -declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>) +declare float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float>) -define <1 x float> @test_faddp_v1f32(<2 x float> %a) { -; CHECK: test_faddp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a) -; CHECK: faddp s0, v0.2s - ret <1 x float> %val +define float @test_faddp_f32(<2 x float> %a) { +; CHECK: test_faddp_f32: +; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>) +declare double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double>) -define <1 x double> @test_faddp_v1f64(<2 x double> %a) { -; CHECK: test_faddp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a) -; CHECK: faddp d0, v0.2d - ret <1 x double> %val +define double @test_faddp_f64(<2 x double> %a) { +; CHECK: test_faddp_f64: +; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) + ret double %val } -declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>) +declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) -define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) { -; CHECK: test_fmaxp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a) -; CHECK: fmaxp s0, v0.2s - ret <1 x float> %val +define float @test_fmaxp_f32(<2 x float> %a) { +; CHECK: test_fmaxp_f32: +; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>) +declare double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double>) -define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) { -; CHECK: test_fmaxp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a) -; CHECK: fmaxp d0, v0.2d - ret <1 x double> %val +define double @test_fmaxp_f64(<2 x double> %a) { +; CHECK: test_fmaxp_f64: +; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) + ret double %val } +declare float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float>) -declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>) - -define <1 x float> @test_fminp_v1f32(<2 x float> %a) { -; CHECK: test_fminp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a) -; CHECK: fminp s0, v0.2s - ret <1 x float> %val +define float @test_fminp_f32(<2 x float> %a) { +; CHECK: test_fminp_f32: +; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>) +declare double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double>) -define <1 x double> @test_fminp_v1f64(<2 x double> %a) { -; CHECK: test_fminp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a) -; CHECK: fminp d0, v0.2d - ret <1 x double> %val +define double @test_fminp_f64(<2 x double> %a) { +; CHECK: test_fminp_f64: +; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) + ret double %val } -declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>) +declare float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float>) -define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) { -; CHECK: test_fmaxnmp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a) -; CHECK: fmaxnmp s0, v0.2s - ret <1 x float> %val +define float @test_fmaxnmp_f32(<2 x float> %a) { +; CHECK: test_fmaxnmp_f32: +; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>) +declare double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double>) -define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) { -; CHECK: test_fmaxnmp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a) -; CHECK: fmaxnmp d0, v0.2d - ret <1 x double> %val +define double @test_fmaxnmp_f64(<2 x double> %a) { +; CHECK: test_fmaxnmp_f64: +; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) + ret double %val } -declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>) +declare float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float>) -define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) { -; CHECK: test_fminnmp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a) -; CHECK: fminnmp s0, v0.2s - ret <1 x float> %val +define float @test_fminnmp_f32(<2 x float> %a) { +; CHECK: test_fminnmp_f32: +; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>) +declare double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double>) -define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) { -; CHECK: test_fminnmp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a) -; CHECK: fminnmp d0, v0.2d - ret <1 x double> %val +define double @test_fminnmp_f64(<2 x double> %a) { +; CHECK: test_fminnmp_f64: +; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) + ret double %val } define float @test_vaddv_f32(<2 x float> %a) { ; CHECK-LABEL: test_vaddv_f32 ; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float> %a) - %2 = extractelement <1 x float> %1, i32 0 - ret float %2 + %1 = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) + ret float %1 } define float @test_vaddvq_f32(<4 x float> %a) { ; CHECK-LABEL: test_vaddvq_f32 ; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float> %a) - %2 = extractelement <1 x float> %1, i32 0 - ret float %2 + %1 = call float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float> %a) + ret float %1 } define double @test_vaddvq_f64(<2 x double> %a) { ; CHECK-LABEL: test_vaddvq_f64 ; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double> %a) - %2 = extractelement <1 x double> %1, i32 0 - ret double %2 + %1 = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) + ret double %1 } define float @test_vmaxv_f32(<2 x float> %a) { ; CHECK-LABEL: test_vmaxv_f32 ; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float> %a) - %2 = extractelement <1 x float> %1, i32 0 - ret float %2 + %1 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) + ret float %1 } define double @test_vmaxvq_f64(<2 x double> %a) { ; CHECK-LABEL: test_vmaxvq_f64 ; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double> %a) - %2 = extractelement <1 x double> %1, i32 0 - ret double %2 + %1 = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) + ret double %1 } define float @test_vminv_f32(<2 x float> %a) { ; CHECK-LABEL: test_vminv_f32 ; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float> %a) - %2 = extractelement <1 x float> %1, i32 0 - ret float %2 + %1 = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) + ret float %1 } define double @test_vminvq_f64(<2 x double> %a) { ; CHECK-LABEL: test_vminvq_f64 ; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double> %a) - %2 = extractelement <1 x double> %1, i32 0 - ret double %2 + %1 = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) + ret double %1 } define double @test_vmaxnmvq_f64(<2 x double> %a) { ; CHECK-LABEL: test_vmaxnmvq_f64 ; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double> %a) - %2 = extractelement <1 x double> %1, i32 0 - ret double %2 + %1 = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) + ret double %1 } define float @test_vmaxnmv_f32(<2 x float> %a) { ; CHECK-LABEL: test_vmaxnmv_f32 ; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float> %a) - %2 = extractelement <1 x float> %1, i32 0 - ret float %2 + %1 = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) + ret float %1 } define double @test_vminnmvq_f64(<2 x double> %a) { ; CHECK-LABEL: test_vminnmvq_f64 ; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double> %a) - %2 = extractelement <1 x double> %1, i32 0 - ret double %2 + %1 = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) + ret double %1 } define float @test_vminnmv_f32(<2 x float> %a) { ; CHECK-LABEL: test_vminnmv_f32 ; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float> %a) - %2 = extractelement <1 x float> %1, i32 0 - ret float %2 + %1 = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) + ret float %1 } define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vpaddq_s64 ; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) + %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %1 } define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vpaddq_u64 ; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) + %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %1 } define i64 @test_vaddvq_s64(<2 x i64> %a) { ; CHECK-LABEL: test_vaddvq_s64 ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) + %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) %2 = extractelement <1 x i64> %1, i32 0 ret i64 %2 } @@ -215,7 +203,7 @@ define i64 @test_vaddvq_s64(<2 x i64> %a) { define i64 @test_vaddvq_u64(<2 x i64> %a) { ; CHECK-LABEL: test_vaddvq_u64 ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) + %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) %2 = extractelement <1 x i64> %1, i32 0 ret i64 %2 } @@ -224,24 +212,4 @@ declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>) declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>) -declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float>) - -declare <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double>) - -declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float>) - -declare <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double>) - -declare <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double>) - -declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float>) - -declare <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double>) - -declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float>) - -declare <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double>) - -declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float>) - -declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float>) \ No newline at end of file +declare float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float>) diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll index 83ceb4e..7c9ffa0 100644 --- a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll @@ -7,14 +7,14 @@ declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_urshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_srshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -24,14 +24,14 @@ declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>) define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_urshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_srshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll index bd66f80..5c010ef 100644 --- a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -6,14 +6,14 @@ declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqadd_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqadd_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -23,14 +23,14 @@ declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqsub_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqsub_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -40,14 +40,14 @@ declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqadd_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqadd_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -57,14 +57,14 @@ declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqsub_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqsub_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -74,14 +74,14 @@ declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqadd_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqadd_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -91,7 +91,7 @@ declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqsub_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -99,7 +99,7 @@ define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqsub_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -109,14 +109,14 @@ declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqadd_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqadd_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -126,14 +126,14 @@ declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqsub_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqsub_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll index 0fd67df..dbf9669 100644 --- a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll @@ -6,7 +6,7 @@ declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqrshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -14,7 +14,7 @@ define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqrshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -24,7 +24,7 @@ declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqrshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -32,7 +32,7 @@ define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqrshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -42,7 +42,7 @@ declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqrshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -50,7 +50,7 @@ define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqrshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -60,7 +60,7 @@ declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqrshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -68,7 +68,7 @@ define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqrshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -78,7 +78,7 @@ declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqrshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -86,7 +86,7 @@ define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqrshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll index 8fdea24..0a1f4c9 100644 --- a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll +++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll @@ -6,14 +6,14 @@ declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -23,14 +23,14 @@ declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -40,14 +40,14 @@ declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -57,14 +57,14 @@ declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -74,14 +74,14 @@ declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll index 1222be5..b712ea4 100644 --- a/test/CodeGen/AArch64/neon-scalar-shift.ll +++ b/test/CodeGen/AArch64/neon-scalar-shift.ll @@ -6,7 +6,7 @@ declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_ushl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -14,7 +14,7 @@ define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -24,15 +24,213 @@ declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>) define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_ushl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } +define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: test_vtst_s64 +; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +entry: + %0 = and <1 x i64> %a, %b + %1 = icmp ne <1 x i64> %0, zeroinitializer + %vtst.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vtst.i +} + +define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: test_vtst_u64 +; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +entry: + %0 = and <1 x i64> %a, %b + %1 = icmp ne <1 x i64> %0, zeroinitializer + %vtst.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vtst.i +} + +define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: test_vsli_n_p64 +; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0 +entry: + %vsli_n2 = tail call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %a, <1 x i64> %b, i32 0) + ret <1 x i64> %vsli_n2 +} + +declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) + +define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsliq_n_p64 +; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +entry: + %vsli_n2 = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 0) + ret <2 x i64> %vsli_n2 +} + +declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) + +define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vrsqrte_u32 +; CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +entry: + %vrsqrte1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) + ret <2 x i32> %vrsqrte1.i +} + +define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vrsqrteq_u32 +; CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vrsqrte1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) + ret <4 x i32> %vrsqrte1.i +} + +define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { +; CHECK-LABEL: test_vqshl_n_s8 +; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +entry: + %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) + ret <8 x i8> %vqshl_n +} + +declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) + +define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshlq_n_s8 +; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 +entry: + %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) + ret <16 x i8> %vqshl_n +} + +declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) + +define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { +; CHECK-LABEL: test_vqshl_n_s16 +; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 +entry: + %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) + ret <4 x i16> %vqshl_n1 +} + +declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) + +define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshlq_n_s16 +; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 +entry: + %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) + ret <8 x i16> %vqshl_n1 +} + +declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) + +define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vqshl_n_s32 +; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 +entry: + %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) + ret <2 x i32> %vqshl_n1 +} + +declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) + +define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshlq_n_s32 +; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 +entry: + %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) + ret <4 x i32> %vqshl_n1 +} + +declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) + +define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { +; CHECK-LABEL: test_vqshlq_n_s64 +; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +entry: + %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) + ret <2 x i64> %vqshl_n1 +} + +declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) + +define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { +; CHECK-LABEL: test_vqshl_n_u8 +; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +entry: + %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) + ret <8 x i8> %vqshl_n +} + +declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) + +define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshlq_n_u8 +; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 +entry: + %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) + ret <16 x i8> %vqshl_n +} + +declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) + +define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { +; CHECK-LABEL: test_vqshl_n_u16 +; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 +entry: + %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) + ret <4 x i16> %vqshl_n1 +} + +declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) + +define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshlq_n_u16 +; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 +entry: + %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) + ret <8 x i16> %vqshl_n1 +} + +declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) + +define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vqshl_n_u32 +; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 +entry: + %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) + ret <2 x i32> %vqshl_n1 +} + +declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) + +define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshlq_n_u32 +; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 +entry: + %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) + ret <4 x i32> %vqshl_n1 +} + +declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) + +define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { +; CHECK-LABEL: test_vqshlq_n_u64 +; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, +entry: + %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) + ret <2 x i64> %vqshl_n1 +} + +declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) + +declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) +declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-select_cc.ll b/test/CodeGen/AArch64/neon-select_cc.ll new file mode 100644 index 0000000..f6b5d3c --- /dev/null +++ b/test/CodeGen/AArch64/neon-select_cc.ll @@ -0,0 +1,202 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + +define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { +; CHECK-LABEL: test_select_cc_v8i8_i8: +; CHECK: and w0, w0, #0xff +; CHECK-NEXT: cmp w0, w1, uxtb +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.8b, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i8 %a, %b + %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d + ret <8x i8> %e +} + +define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { +; CHECK-LABEL: test_select_cc_v8i8_f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d + ret <8x i8> %e +} + +define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { +; CHECK-LABEL: test_select_cc_v8i8_f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d + ret <8x i8> %e +} + +define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { +; CHECK-LABEL: test_select_cc_v16i8_i8: +; CHECK: and w0, w0, #0xff +; CHECK-NEXT: cmp w0, w1, uxtb +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.16b, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i8 %a, %b + %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d + ret <16x i8> %e +} + +define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { +; CHECK-LABEL: test_select_cc_v16i8_f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d + ret <16x i8> %e +} + +define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { +; CHECK-LABEL: test_select_cc_v16i8_f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d + ret <16x i8> %e +} + +define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { +; CHECK-LABEL: test_select_cc_v4i16: +; CHECK: and w0, w0, #0xffff +; CHECK-NEXT: cmp w0, w1, uxth +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.4h, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i16 %a, %b + %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d + ret <4x i16> %e +} + +define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { +; CHECK-LABEL: test_select_cc_v8i16: +; CHECK: and w0, w0, #0xffff +; CHECK-NEXT: cmp w0, w1, uxth +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.8h, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i16 %a, %b + %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d + ret <8x i16> %e +} + +define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { +; CHECK-LABEL: test_select_cc_v2i32: +; CHECK: cmp w0, w1, uxtw +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.2s, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i32 %a, %b + %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d + ret <2x i32> %e +} + +define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { +; CHECK-LABEL: test_select_cc_v4i32: +; CHECK: cmp w0, w1, uxtw +; CHECK-NEXT: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i32 %a, %b + %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d + ret <4x i32> %e +} + +define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { +; CHECK-LABEL: test_select_cc_v1i64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinv x0, xzr, xzr, ne +; CHECK-NEXT: fmov d{{[0-9]+}}, x0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i64 %a, %b + %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d + ret <1x i64> %e +} + +define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { +; CHECK-LABEL: test_select_cc_v2i64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinv x0, xzr, xzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.2d, x0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i64 %a, %b + %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d + ret <2x i64> %e +} + +define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { +; CHECK-LABEL: test_select_cc_v1f32: +; CHECK: fcmp s0, s1 +; CHECK-NEXT: fcsel s0, s2, s3, eq + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d + ret <1 x float> %e +} + +define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { +; CHECK-LABEL: test_select_cc_v2f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d + ret <2 x float> %e +} + +define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { +; CHECK-LABEL: test_select_cc_v4f32: +; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq float %a, %b + %e = select i1 %cmp31, <4x float> %c, <4x float> %d + ret <4x float> %e +} + +define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { +; CHECK-LABEL: test_select_cc_v4f32_icmp: +; CHECK: cmp w0, w1, uxtw +; CHECK: csinv w0, wzr, wzr, ne +; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b + %cmp31 = icmp eq i32 %a, %b + %e = select i1 %cmp31, <4x float> %c, <4x float> %d + ret <4x float> %e +} + +define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { +; CHECK-LABEL: test_select_cc_v1f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d + ret <1 x double> %e +} + +define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { +; CHECK-LABEL: test_select_cc_v1f64_icmp: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinv x0, xzr, xzr, ne +; CHECK-NEXT: fmov d{{[0-9]+}}, x0 +; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b + %cmp31 = icmp eq i64 %a, %b + %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d + ret <1 x double> %e +} + +define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { +; CHECK-LABEL: test_select_cc_v2f64: +; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] +; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b + %cmp31 = fcmp oeq double %a, %b + %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d + ret <2 x double> %e +} diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll index d45c476..d10d551 100644 --- a/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -191,3 +191,13 @@ define <2 x i64> @test_ushll2_shl0_v4i32(<4 x i32> %a) { %tmp = zext <2 x i32> %1 to <2 x i64> ret <2 x i64> %tmp } + +define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 { +; CHECK: test_ushll_cmp: +; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0 + %cmp.i = icmp eq <8 x i8> %a, %b + %vcgtz.i.i = sext <8 x i1> %cmp.i to <8 x i8> + %vmovl.i.i.i = zext <8 x i8> %vcgtz.i.i to <8 x i16> + ret <8 x i16> %vmovl.i.i.i +} diff --git a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll new file mode 100644 index 0000000..0b520d7 --- /dev/null +++ b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll @@ -0,0 +1,333 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: shl.v8i8: +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = shl <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: shl.v4i16: +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = shl <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shl.v2i32: +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = shl <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: shl.v1i64: +; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = shl <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shl.v16i8: +; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = shl <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shl.v8i16: +; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = shl <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shl.v4i32: +; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = shl <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shl.v2i64: +; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = shl <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: lshr.v8i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = lshr <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: lshr.v4i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = lshr <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: lshr.v2i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = lshr <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: lshr.v1i64: +; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = lshr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: lshr.v16i8: +; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = lshr <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: lshr.v8i16: +; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h +; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = lshr <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: lshr.v4i32: +; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = lshr <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: lshr.v2i64: +; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = lshr <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: ashr.v8i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = ashr <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: ashr.v4i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = ashr <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: ashr.v2i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = ashr <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: ashr.v1i64: +; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = ashr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: ashr.v16i8: +; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = ashr <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: ashr.v8i16: +; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h +; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = ashr <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: ashr.v4i32: +; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = ashr <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: ashr.v2i64: +; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = ashr <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <1 x i64> @shl.v1i64.0(<1 x i64> %a) { +; CHECK-LABEL: shl.v1i64.0: +; CHECK-NOT: shl d{{[0-9]+}}, d{{[0-9]+}}, #0 + %c = shl <1 x i64> %a, zeroinitializer + ret <1 x i64> %c +} + +define <2 x i32> @shl.v2i32.0(<2 x i32> %a) { +; CHECK-LABEL: shl.v2i32.0: +; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 + %c = shl <2 x i32> %a, zeroinitializer + ret <2 x i32> %c +} + +; The following test cases test shl/ashr/lshr with v1i8/v1i16/v1i32 types + +define <1 x i8> @shl.v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: shl.v1i8: +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = shl <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @shl.v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: shl.v1i16: +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = shl <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @shl.v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: shl.v1i32: +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = shl <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @ashr.v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: ashr.v1i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = ashr <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @ashr.v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: ashr.v1i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = ashr <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @ashr.v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: ashr.v1i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = ashr <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @lshr.v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: lshr.v1i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = lshr <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @lshr.v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: lshr.v1i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = lshr <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @lshr.v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: lshr.v1i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = lshr <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @shl.v1i8.imm(<1 x i8> %a) { +; CHECK-LABEL: shl.v1i8.imm: +; CHECK: shl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 + %c = shl <1 x i8> %a, + ret <1 x i8> %c +} + +define <1 x i16> @shl.v1i16.imm(<1 x i16> %a) { +; CHECK-LABEL: shl.v1i16.imm: +; CHECK: shl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #5 + %c = shl <1 x i16> %a, + ret <1 x i16> %c +} + +define <1 x i32> @shl.v1i32.imm(<1 x i32> %a) { +; CHECK-LABEL: shl.v1i32.imm: +; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 + %c = shl <1 x i32> %a, zeroinitializer + ret <1 x i32> %c +} + +define <1 x i8> @ashr.v1i8.imm(<1 x i8> %a) { +; CHECK-LABEL: ashr.v1i8.imm: +; CHECK: sshr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 + %c = ashr <1 x i8> %a, + ret <1 x i8> %c +} + +define <1 x i16> @ashr.v1i16.imm(<1 x i16> %a) { +; CHECK-LABEL: ashr.v1i16.imm: +; CHECK: sshr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 + %c = ashr <1 x i16> %a, + ret <1 x i16> %c +} + +define <1 x i32> @ashr.v1i32.imm(<1 x i32> %a) { +; CHECK-LABEL: ashr.v1i32.imm: +; CHECK: sshr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31 + %c = ashr <1 x i32> %a, + ret <1 x i32> %c +} + +define <1 x i8> @lshr.v1i8.imm(<1 x i8> %a) { +; CHECK-LABEL: lshr.v1i8.imm: +; CHECK: ushr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 + %c = lshr <1 x i8> %a, + ret <1 x i8> %c +} + +define <1 x i16> @lshr.v1i16.imm(<1 x i16> %a) { +; CHECK-LABEL: lshr.v1i16.imm: +; CHECK: ushr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 + %c = lshr <1 x i16> %a, + ret <1 x i16> %c +} + +define <1 x i32> @lshr.v1i32.imm(<1 x i32> %a) { +; CHECK-LABEL: lshr.v1i32.imm: +; CHECK: ushr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31 + %c = lshr <1 x i32> %a, + ret <1 x i32> %c +} diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll index 3f28320..927c933 100644 --- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ b/test/CodeGen/AArch64/neon-simd-ldst-one.ll @@ -1,5 +1,8 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +%struct.uint8x16x2_t = type { [2 x <16 x i8>] } +%struct.poly8x16x2_t = type { [2 x <16 x i8>] } +%struct.uint8x16x3_t = type { [3 x <16 x i8>] } %struct.int8x16x2_t = type { [2 x <16 x i8>] } %struct.int16x8x2_t = type { [2 x <8 x i16>] } %struct.int32x4x2_t = type { [2 x <4 x i32>] } @@ -37,6 +40,87 @@ %struct.float32x2x4_t = type { [4 x <2 x float>] } %struct.float64x1x4_t = type { [4 x <1 x double>] } +define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v16i8 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <16 x i8> %a, + ret <16 x i8> %b +} + +define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i16 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i16> %a, + ret <8 x i16> %b +} + +define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i32> %a, + ret <4 x i32> %b +} + +define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i64 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { +; CHECK-LABEL: test_ld_from_poll_v4f32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + +define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_ld_from_poll_v2f64 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <2 x double> %a, + ret <2 x double> %b +} + +define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i8 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i8> %a, + ret <8 x i8> %b +} + +define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i16 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i16> %a, + ret <4 x i16> %b +} + +define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i32> %a, + ret <2 x i32> %b +} + define <16 x i8> @test_vld1q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld1q_dup_s8 ; CHECK: ld1r {{{v[0-9]+}}.16b}, [x0] @@ -155,6 +239,31 @@ entry: ret <1 x double> %1 } +define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1i64 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}] + %1 = load i64* %a, align 8 + store i64 %1, i64* %b, align 8 + %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 + ret <1 x i64> %vecinit.i +} + +define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1f64 +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] + %1 = load double* %a, align 8 + store double %1, double* %b, align 8 + %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 + ret <1 x double> %vecinit.i +} + define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld2q_dup_s8 ; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0] @@ -2110,4 +2219,81 @@ declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) \ No newline at end of file +declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) + +define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld2q_lane_s8 +; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 + %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) + %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 + %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld2q_lane_u8 +; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 + %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) + %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 + %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld2q_lane_p8 +; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 + %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) + %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 + %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld3q_lane_s8 +; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 + %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 + %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) + %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 + %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 + %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 + %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 + %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 + ret %struct.int8x16x3_t %.fca.0.2.insert +} + +define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld3q_lane_u8 +; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 + %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 + %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) + %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 + %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 + %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 + %.fca.0.0.insert = insertvalue %struct.uint8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 + %.fca.0.2.insert = insertvalue %struct.uint8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 + ret %struct.uint8x16x3_t %.fca.0.2.insert +} + diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll index 8eac1e8..7a51c0f 100644 --- a/test/CodeGen/AArch64/neon-simd-tbl.ll +++ b/test/CodeGen/AArch64/neon-simd-tbl.ll @@ -1,45 +1,45 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8>, <8 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8>, <16 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) -declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK: test_vtbl1_s8: ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) ret <8 x i8> %vtbl11.i } @@ -47,7 +47,7 @@ define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) { ; CHECK: test_vqtbl1_s8: ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b) + %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) ret <8 x i8> %vtbl1.i } @@ -58,7 +58,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) ret <8 x i8> %vtbl17.i } @@ -68,7 +68,7 @@ define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) + %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl2.i } @@ -81,7 +81,7 @@ entry: %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) ret <8 x i8> %vtbl212.i } @@ -92,7 +92,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) + %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl3.i } @@ -106,7 +106,7 @@ entry: %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) + %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) ret <8 x i8> %vtbl216.i } @@ -118,7 +118,7 @@ entry: %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) + %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl4.i } @@ -126,7 +126,7 @@ define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: test_vqtbl1q_s8: ; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b) + %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %vtbl1.i } @@ -136,7 +136,7 @@ define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) + %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl2.i } @@ -147,7 +147,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) + %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl3.i } @@ -159,7 +159,7 @@ entry: %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) + %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl4.i } @@ -168,7 +168,7 @@ define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c) + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) %0 = icmp uge <8 x i8> %c, %1 = sext <8 x i1> %0 to <8 x i8> %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) @@ -182,7 +182,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) + %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) ret <8 x i8> %vtbx17.i } @@ -195,7 +195,7 @@ entry: %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) %0 = icmp uge <8 x i8> %c, %1 = sext <8 x i1> %0 to <8 x i8> %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) @@ -212,7 +212,7 @@ entry: %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) + %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) ret <8 x i8> %vtbx216.i } @@ -220,7 +220,7 @@ define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { ; CHECK: test_vqtbx1_s8: ; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) + %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) ret <8 x i8> %vtbx1.i } @@ -230,7 +230,7 @@ define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) + %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx2.i } @@ -241,7 +241,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) + %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx3.i } @@ -253,7 +253,7 @@ entry: %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) + %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx4.i } @@ -261,7 +261,7 @@ define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK: test_vqtbx1q_s8: ; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) ret <16 x i8> %vtbx1.i } @@ -271,7 +271,7 @@ define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) + %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx2.i } @@ -282,7 +282,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) + %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx3.i } @@ -294,7 +294,7 @@ entry: %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) + %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx4.i } @@ -303,7 +303,7 @@ define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) { ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) ret <8 x i8> %vtbl11.i } @@ -311,7 +311,7 @@ define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) { ; CHECK: test_vqtbl1_u8: ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b) + %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) ret <8 x i8> %vtbl1.i } @@ -322,7 +322,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) ret <8 x i8> %vtbl17.i } @@ -332,7 +332,7 @@ define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) + %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl2.i } @@ -345,7 +345,7 @@ entry: %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) ret <8 x i8> %vtbl212.i } @@ -356,7 +356,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) + %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl3.i } @@ -370,7 +370,7 @@ entry: %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) + %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) ret <8 x i8> %vtbl216.i } @@ -382,7 +382,7 @@ entry: %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) + %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl4.i } @@ -390,7 +390,7 @@ define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: test_vqtbl1q_u8: ; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b) + %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %vtbl1.i } @@ -400,7 +400,7 @@ define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) + %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl2.i } @@ -411,7 +411,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) + %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl3.i } @@ -423,7 +423,7 @@ entry: %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) + %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl4.i } @@ -432,7 +432,7 @@ define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c) + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) %0 = icmp uge <8 x i8> %c, %1 = sext <8 x i1> %0 to <8 x i8> %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) @@ -446,7 +446,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) + %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) ret <8 x i8> %vtbx17.i } @@ -459,7 +459,7 @@ entry: %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) %0 = icmp uge <8 x i8> %c, %1 = sext <8 x i1> %0 to <8 x i8> %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) @@ -476,7 +476,7 @@ entry: %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) + %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) ret <8 x i8> %vtbx216.i } @@ -484,7 +484,7 @@ define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { ; CHECK: test_vqtbx1_u8: ; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) + %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) ret <8 x i8> %vtbx1.i } @@ -494,7 +494,7 @@ define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) + %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx2.i } @@ -505,7 +505,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) + %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx3.i } @@ -517,7 +517,7 @@ entry: %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) + %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx4.i } @@ -525,7 +525,7 @@ define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK: test_vqtbx1q_u8: ; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) ret <16 x i8> %vtbx1.i } @@ -535,7 +535,7 @@ define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) + %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx2.i } @@ -546,7 +546,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) + %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx3.i } @@ -558,7 +558,7 @@ entry: %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) + %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx4.i } @@ -567,7 +567,7 @@ define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) { ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) ret <8 x i8> %vtbl11.i } @@ -575,7 +575,7 @@ define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) { ; CHECK: test_vqtbl1_p8: ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b) + %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) ret <8 x i8> %vtbl1.i } @@ -586,7 +586,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b) + %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) ret <8 x i8> %vtbl17.i } @@ -596,7 +596,7 @@ define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) + %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl2.i } @@ -609,7 +609,7 @@ entry: %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) ret <8 x i8> %vtbl212.i } @@ -620,7 +620,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) + %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl3.i } @@ -634,7 +634,7 @@ entry: %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) + %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) ret <8 x i8> %vtbl216.i } @@ -646,7 +646,7 @@ entry: %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) + %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) ret <8 x i8> %vtbl4.i } @@ -654,7 +654,7 @@ define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: test_vqtbl1q_p8: ; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b) + %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %vtbl1.i } @@ -664,7 +664,7 @@ define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { entry: %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) + %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl2.i } @@ -675,7 +675,7 @@ entry: %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) + %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl3.i } @@ -687,7 +687,7 @@ entry: %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) + %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) ret <16 x i8> %vtbl4.i } @@ -696,7 +696,7 @@ define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c) + %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) %0 = icmp uge <8 x i8> %c, %1 = sext <8 x i1> %0 to <8 x i8> %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) @@ -710,7 +710,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) + %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) ret <8 x i8> %vtbx17.i } @@ -723,7 +723,7 @@ entry: %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) + %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) %0 = icmp uge <8 x i8> %c, %1 = sext <8 x i1> %0 to <8 x i8> %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) @@ -740,7 +740,7 @@ entry: %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) + %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) ret <8 x i8> %vtbx216.i } @@ -748,7 +748,7 @@ define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { ; CHECK: test_vqtbx1_p8: ; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) + %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) ret <8 x i8> %vtbx1.i } @@ -758,7 +758,7 @@ define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) + %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx2.i } @@ -769,7 +769,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) + %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx3.i } @@ -781,7 +781,7 @@ entry: %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) + %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) ret <8 x i8> %vtbx4.i } @@ -789,7 +789,7 @@ define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK: test_vqtbx1q_p8: ; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) ret <16 x i8> %vtbx1.i } @@ -799,7 +799,7 @@ define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x entry: %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) + %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx2.i } @@ -810,7 +810,7 @@ entry: %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) + %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx3.i } @@ -822,7 +822,7 @@ entry: %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) + %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) ret <16 x i8> %vtbx4.i } diff --git a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll b/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll new file mode 100644 index 0000000..bb3300e --- /dev/null +++ b/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +; This file tests the spill of FPR8/FPR16. The volatile loads/stores force the +; allocator to keep the value live until it's needed. + +%bigtype_v1i8 = type [20 x <1 x i8>] + +define void @spill_fpr8(%bigtype_v1i8* %addr) { +; CHECK-LABEL: spill_fpr8: +; CHECK: 1-byte Folded Spill +; CHECK: 1-byte Folded Reload + %val1 = load volatile %bigtype_v1i8* %addr + %val2 = load volatile %bigtype_v1i8* %addr + store volatile %bigtype_v1i8 %val1, %bigtype_v1i8* %addr + store volatile %bigtype_v1i8 %val2, %bigtype_v1i8* %addr + ret void +} + +%bigtype_v1i16 = type [20 x <1 x i16>] + +define void @spill_fpr16(%bigtype_v1i16* %addr) { +; CHECK-LABEL: spill_fpr16: +; CHECK: 2-byte Folded Spill +; CHECK: 2-byte Folded Reload + %val1 = load volatile %bigtype_v1i16* %addr + %val2 = load volatile %bigtype_v1i16* %addr + store volatile %bigtype_v1i16 %val1, %bigtype_v1i16* %addr + store volatile %bigtype_v1i16 %val2, %bigtype_v1i16* %addr + ret void +} \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll new file mode 100644 index 0000000..e5b7694 --- /dev/null +++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; A vector TruncStore can not be selected. +; Test a trunc IR and a vector store IR can be selected correctly. +define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) { +; CHECK-LABEL: truncStore.v2i64: +; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d +; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] + %b = trunc <2 x i64> %a to <2 x i32> + store <2 x i32> %b, <2 x i32>* %result + ret void +} + +define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) { +; CHECK-LABEL: truncStore.v4i32: +; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s +; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] + %b = trunc <4 x i32> %a to <4 x i16> + store <4 x i16> %b, <4 x i16>* %result + ret void +} + +define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) { +; CHECK-LABEL: truncStore.v8i16: +; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h +; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] + %b = trunc <8 x i16> %a to <8 x i8> + store <8 x i8> %b, <8 x i8>* %result + ret void +} + +; A vector LoadExt can not be selected. +; Test a vector load IR and a sext/zext IR can be selected correctly. +define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) { +; CHECK-LABEL: loadSExt.v4i8: +; CHECK: ldrsb + %a = load <4 x i8>* %ref + %conv = sext <4 x i8> %a to <4 x i32> + ret <4 x i32> %conv +} + +define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) { +; CHECK-LABEL: loadZExt.v4i8: +; CHECK: ldrb + %a = load <4 x i8>* %ref + %conv = zext <4 x i8> %a to <4 x i32> + ret <4 x i32> %conv +} + +define i32 @loadExt.i32(<4 x i8>* %ref) { +; CHECK-LABEL: loadExt.i32: +; CHECK: ldrb + %a = load <4 x i8>* %ref + %vecext = extractelement <4 x i8> %a, i32 0 + %conv = zext i8 %vecext to i32 + ret i32 %conv +} \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/neon-v1i1-setcc.ll new file mode 100644 index 0000000..6c7d009 --- /dev/null +++ b/test/CodeGen/AArch64/neon-v1i1-setcc.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + +; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type +; is illegal in AArch64 backend, the legalizer tries to scalarize this node. +; As the v1i64 operands of SETCC are legal types, they will not be scalarized. +; Currently the type legalizer will have an assertion failure as it assumes all +; operands of SETCC have been legalized. +; FIXME: If the algorithm of type scalarization is improved and can legaize +; "v1i1 SETCC" correctly, these test cases are not needed. + +define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) { +; CHECK-LABEL: test_sext_extr_cmp_0: +; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = icmp sge <1 x i64> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + %vget_lane = sext i1 %2 to i64 + ret i64 %vget_lane +} + +define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) { +; CHECK-LABEL: test_sext_extr_cmp_1: +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fcmp oeq <1 x double> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + %vget_lane = sext i1 %2 to i64 + ret i64 %vget_lane +} + +define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { +; CHECK-LABEL: test_select_v1i1_0: +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = icmp eq <1 x i64> %v1, %v2 + %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 + ret <1 x i64> %res +} + +define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) { +; CHECK-LABEL: test_select_v1i1_1: +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = fcmp oeq <1 x double> %v1, %v2 + %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 + ret <1 x i64> %res +} + +define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) { +; CHECK-LABEL: test_select_v1i1_2: +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %1 = icmp eq <1 x i64> %v1, %v2 + %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3 + ret <1 x double> %res +} + +define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { +; CHECK-LABEL: test_br_extr_cmp: +; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}} + %1 = icmp eq <1 x i64> %v1, %v2 + %2 = extractelement <1 x i1> %1, i32 0 + br i1 %2, label %if.end, label %if.then + +if.then: + ret i32 0; + +if.end: + ret i32 1; +} diff --git a/test/CodeGen/AArch64/neon-vector-list-spill.ll b/test/CodeGen/AArch64/neon-vector-list-spill.ll new file mode 100644 index 0000000..3ab69c4 --- /dev/null +++ b/test/CodeGen/AArch64/neon-vector-list-spill.ll @@ -0,0 +1,175 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast + +; FIXME: We should not generate ld/st for such register spill/fill, because the +; test case seems very simple and the register pressure is not high. If the +; spill/fill algorithm is optimized, this test case may not be triggered. And +; then we can delete it. +define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.DPairReg: +; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0 + %res = extractelement <2 x i32> %vld.extract, i32 1 + ret i32 %res +} + +define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.DTripleReg: +; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 + %res = extractelement <4 x i16> %vld.extract, i32 1 + ret i16 %res +} + +define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.DQuadReg: +; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 + %res = extractelement <4 x i16> %vld.extract, i32 0 + ret i16 %res +} + +define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.QPairReg: +; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 + %res = extractelement <4 x i32> %vld.extract, i32 1 + ret i32 %res +} + +define float @spill.QTripleReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.QTripleReg: +; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 + %res = extractelement <4 x float> %vld3.extract, i32 1 + ret float %res +} + +define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.QQuadReg: +; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0 + %res = extractelement <16 x i8> %vld.extract, i32 1 + ret i8 %res +} + +declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) +declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) + +declare void @foo() + +; FIXME: We should not generate ld/st for such register spill/fill, because the +; test case seems very simple and the register pressure is not high. If the +; spill/fill algorithm is optimized, this test case may not be triggered. And +; then we can delete it. +; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo +define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { + tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) + tail call void @foo() + %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> + %1 = bitcast <2 x i64> %sv to <8 x i16> + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> + %3 = mul <8 x i16> %2, %2 + ret <8 x i16> %3 +} + +; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo +define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { + tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) + tail call void @foo() + %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> + %1 = bitcast <2 x i64> %sv to <8 x i16> + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> + %3 = mul <8 x i16> %2, %2 + ret <8 x i16> %3 +} + +; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo +define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { + tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) + tail call void @foo() + %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> + %1 = bitcast <2 x i64> %sv to <8 x i16> + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> + %3 = mul <8 x i16> %2, %2 + ret <8 x i16> %3 +} + +declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) +declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) +declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) \ No newline at end of file diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index 6ec4b19..3404d3f 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; Make sure exception-handling PIC code can be linked correctly. An alternative ; to the sequence described below would have .gcc_except_table itself writable @@ -10,8 +11,8 @@ ; ... referring indirectly to stubs for its typeinfo ... ; CHECK: // @TType Encoding = indirect pcrel sdata8 ; ... one of which is "int"'s typeinfo -; CHECK: .Ltmp9: -; CHECK-NEXT: .xword .L_ZTIi.DW.stub-.Ltmp9 +; CHECK: .Ltmp7: +; CHECK-NEXT: .xword .L_ZTIi.DW.stub-.Ltmp7 ; .. and which is properly defined (in a writable section for the dynamic loader) later. ; CHECK: .section .data.rel,"aw" diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll new file mode 100644 index 0000000..18a948b --- /dev/null +++ b/test/CodeGen/AArch64/ragreedy-csr.ll @@ -0,0 +1,297 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s + +; This testing case is reduced from 197.parser prune_match function. +; We make sure that we do not use callee-saved registers (x19 to x25). +; rdar://16162005 + +; CHECK-LABEL: prune_match: +; CHECK: entry +; CHECK: str x30, [sp +; CHECK-NOT: stp x25, +; CHECK-NOT: stp x23, x24 +; CHECK-NOT: stp x21, x22 +; CHECK-NOT: stp x19, x20 +; CHECK: if.end +; CHECK: return +; CHECK: ldr x30, [sp +; CHECK-NOT: ldp x19, x20 +; CHECK-NOT: ldp x21, x22 +; CHECK-NOT: ldp x23, x24 +; CHECK-NOT: ldp x25, + +%struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* } +%struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* } +%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i64, i8**)*, i32 (i32, i8*, i64, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* } +%struct._RuneRange = type { i32, %struct._RuneEntry* } +%struct._RuneEntry = type { i32, i32, i32, i32* } +%struct._RuneCharClass = type { [14 x i8], i32 } +%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon } +%union.anon = type { %struct.E_list_struct* } +%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* } +%struct.domain_struct = type { i8*, i32, %struct.List_o_links_struct*, i32, i32, %struct.d_tree_leaf_struct*, %struct.domain_struct* } +%struct.d_tree_leaf_struct = type { %struct.domain_struct*, i32, %struct.d_tree_leaf_struct* } +@_DefaultRuneLocale = external global %struct._RuneLocale +declare i32 @__maskrune(i32, i64) #7 +define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 { +entry: + %label56 = bitcast %struct.Connector_struct* %a to i16* + %0 = load i16* %label56, align 2 + %label157 = bitcast %struct.Connector_struct* %b to i16* + %1 = load i16* %label157, align 2 + %cmp = icmp eq i16 %0, %1 + br i1 %cmp, label %if.end, label %return, !prof !988 +if.end: + %priority = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 2 + %2 = load i8* %priority, align 1 + %priority5 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 2 + %3 = load i8* %priority5, align 1 + %string = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 5 + %4 = load i8** %string, align 8 + %string7 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 5 + %5 = load i8** %string7, align 8 + br label %while.cond +while.cond: + %lsr.iv27 = phi i64 [ %lsr.iv.next28, %if.end17 ], [ 0, %if.end ] + %scevgep55 = getelementptr i8* %4, i64 %lsr.iv27 + %6 = load i8* %scevgep55, align 1 + %idxprom.i.i = sext i8 %6 to i64 + %isascii.i.i224 = icmp sgt i8 %6, -1 + br i1 %isascii.i.i224, label %cond.true.i.i, label %cond.false.i.i, !prof !181 +cond.true.i.i: + %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i + %7 = load i32* %arrayidx.i.i, align 4 + %and.i.i = and i32 %7, 32768 + br label %isupper.exit +cond.false.i.i: + %8 = trunc i64 %idxprom.i.i to i8 + %conv8 = sext i8 %8 to i32 + %call3.i.i = tail call i32 @__maskrune(i32 %conv8, i64 32768) #3 + br label %isupper.exit +isupper.exit: + %tobool1.sink.i.in.i = phi i32 [ %and.i.i, %cond.true.i.i ], [ %call3.i.i, %cond.false.i.i ] + %tobool1.sink.i.i = icmp eq i32 %tobool1.sink.i.in.i, 0 + br i1 %tobool1.sink.i.i, label %lor.rhs, label %while.body, !prof !989 +lor.rhs: + %sunkaddr = ptrtoint i8* %5 to i64 + %sunkaddr58 = add i64 %sunkaddr, %lsr.iv27 + %sunkaddr59 = inttoptr i64 %sunkaddr58 to i8* + %9 = load i8* %sunkaddr59, align 1 + %idxprom.i.i214 = sext i8 %9 to i64 + %isascii.i.i213225 = icmp sgt i8 %9, -1 + br i1 %isascii.i.i213225, label %cond.true.i.i217, label %cond.false.i.i219, !prof !181 +cond.true.i.i217: + %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214 + %10 = load i32* %arrayidx.i.i215, align 4 + %and.i.i216 = and i32 %10, 32768 + br label %isupper.exit223 +cond.false.i.i219: + %11 = trunc i64 %idxprom.i.i214 to i8 + %conv9 = sext i8 %11 to i32 + %call3.i.i218 = tail call i32 @__maskrune(i32 %conv9, i64 32768) #3 + br label %isupper.exit223 +isupper.exit223: + %tobool1.sink.i.in.i220 = phi i32 [ %and.i.i216, %cond.true.i.i217 ], [ %call3.i.i218, %cond.false.i.i219 ] + %tobool1.sink.i.i221 = icmp eq i32 %tobool1.sink.i.in.i220, 0 + br i1 %tobool1.sink.i.i221, label %while.end, label %while.body, !prof !990 +while.body: + %sunkaddr60 = ptrtoint i8* %4 to i64 + %sunkaddr61 = add i64 %sunkaddr60, %lsr.iv27 + %sunkaddr62 = inttoptr i64 %sunkaddr61 to i8* + %12 = load i8* %sunkaddr62, align 1 + %sunkaddr63 = ptrtoint i8* %5 to i64 + %sunkaddr64 = add i64 %sunkaddr63, %lsr.iv27 + %sunkaddr65 = inttoptr i64 %sunkaddr64 to i8* + %13 = load i8* %sunkaddr65, align 1 + %cmp14 = icmp eq i8 %12, %13 + br i1 %cmp14, label %if.end17, label %return, !prof !991 +if.end17: + %lsr.iv.next28 = add i64 %lsr.iv27, 1 + br label %while.cond +while.end: + %14 = or i8 %3, %2 + %15 = icmp eq i8 %14, 0 + br i1 %15, label %if.then23, label %if.else88, !prof !992 +if.then23: + %sunkaddr66 = ptrtoint %struct.Connector_struct* %a to i64 + %sunkaddr67 = add i64 %sunkaddr66, 16 + %sunkaddr68 = inttoptr i64 %sunkaddr67 to i8** + %16 = load i8** %sunkaddr68, align 8 + %17 = load i8* %16, align 1 + %cmp26 = icmp eq i8 %17, 83 + %sunkaddr69 = ptrtoint i8* %4 to i64 + %sunkaddr70 = add i64 %sunkaddr69, %lsr.iv27 + %sunkaddr71 = inttoptr i64 %sunkaddr70 to i8* + %18 = load i8* %sunkaddr71, align 1 + br i1 %cmp26, label %land.lhs.true28, label %while.cond59.preheader, !prof !993 +land.lhs.true28: + switch i8 %18, label %land.rhs.preheader [ + i8 112, label %land.lhs.true35 + i8 0, label %return + ], !prof !994 +land.lhs.true35: + %sunkaddr72 = ptrtoint i8* %5 to i64 + %sunkaddr73 = add i64 %sunkaddr72, %lsr.iv27 + %sunkaddr74 = inttoptr i64 %sunkaddr73 to i8* + %19 = load i8* %sunkaddr74, align 1 + switch i8 %19, label %land.rhs.preheader [ + i8 112, label %land.lhs.true43 + ], !prof !995 +land.lhs.true43: + %20 = ptrtoint i8* %16 to i64 + %21 = sub i64 0, %20 + %scevgep52 = getelementptr i8* %4, i64 %21 + %scevgep53 = getelementptr i8* %scevgep52, i64 %lsr.iv27 + %scevgep54 = getelementptr i8* %scevgep53, i64 -1 + %cmp45 = icmp eq i8* %scevgep54, null + br i1 %cmp45, label %return, label %lor.lhs.false47, !prof !996 +lor.lhs.false47: + %22 = ptrtoint i8* %16 to i64 + %23 = sub i64 0, %22 + %scevgep47 = getelementptr i8* %4, i64 %23 + %scevgep48 = getelementptr i8* %scevgep47, i64 %lsr.iv27 + %scevgep49 = getelementptr i8* %scevgep48, i64 -2 + %cmp50 = icmp eq i8* %scevgep49, null + br i1 %cmp50, label %land.lhs.true52, label %while.cond59.preheader, !prof !997 +land.lhs.true52: + %sunkaddr75 = ptrtoint i8* %4 to i64 + %sunkaddr76 = add i64 %sunkaddr75, %lsr.iv27 + %sunkaddr77 = add i64 %sunkaddr76, -1 + %sunkaddr78 = inttoptr i64 %sunkaddr77 to i8* + %24 = load i8* %sunkaddr78, align 1 + %cmp55 = icmp eq i8 %24, 73 + %cmp61233 = icmp eq i8 %18, 0 + %or.cond265 = or i1 %cmp55, %cmp61233 + br i1 %or.cond265, label %return, label %land.rhs.preheader, !prof !998 +while.cond59.preheader: + %cmp61233.old = icmp eq i8 %18, 0 + br i1 %cmp61233.old, label %return, label %land.rhs.preheader, !prof !999 +land.rhs.preheader: + %scevgep33 = getelementptr i8* %5, i64 %lsr.iv27 + %scevgep43 = getelementptr i8* %4, i64 %lsr.iv27 + br label %land.rhs +land.rhs: + %lsr.iv = phi i64 [ 0, %land.rhs.preheader ], [ %lsr.iv.next, %if.then83 ] + %25 = phi i8 [ %27, %if.then83 ], [ %18, %land.rhs.preheader ] + %scevgep34 = getelementptr i8* %scevgep33, i64 %lsr.iv + %26 = load i8* %scevgep34, align 1 + %cmp64 = icmp eq i8 %26, 0 + br i1 %cmp64, label %return, label %while.body66, !prof !1000 +while.body66: + %cmp68 = icmp eq i8 %25, 42 + %cmp72 = icmp eq i8 %26, 42 + %or.cond = or i1 %cmp68, %cmp72 + br i1 %or.cond, label %if.then83, label %lor.lhs.false74, !prof !1001 +lor.lhs.false74: + %cmp77 = icmp ne i8 %25, %26 + %cmp81 = icmp eq i8 %25, 94 + %or.cond208 = or i1 %cmp77, %cmp81 + br i1 %or.cond208, label %return, label %if.then83, !prof !1002 +if.then83: + %scevgep44 = getelementptr i8* %scevgep43, i64 %lsr.iv + %scevgep45 = getelementptr i8* %scevgep44, i64 1 + %27 = load i8* %scevgep45, align 1 + %cmp61 = icmp eq i8 %27, 0 + %lsr.iv.next = add i64 %lsr.iv, 1 + br i1 %cmp61, label %return, label %land.rhs, !prof !999 +if.else88: + %cmp89 = icmp eq i8 %2, 1 + %cmp92 = icmp eq i8 %3, 2 + %or.cond159 = and i1 %cmp89, %cmp92 + br i1 %or.cond159, label %while.cond95.preheader, label %if.else123, !prof !1003 +while.cond95.preheader: + %sunkaddr79 = ptrtoint i8* %4 to i64 + %sunkaddr80 = add i64 %sunkaddr79, %lsr.iv27 + %sunkaddr81 = inttoptr i64 %sunkaddr80 to i8* + %28 = load i8* %sunkaddr81, align 1 + %cmp97238 = icmp eq i8 %28, 0 + br i1 %cmp97238, label %return, label %land.rhs99.preheader, !prof !1004 +land.rhs99.preheader: + %scevgep31 = getelementptr i8* %5, i64 %lsr.iv27 + %scevgep40 = getelementptr i8* %4, i64 %lsr.iv27 + br label %land.rhs99 +land.rhs99: + %lsr.iv17 = phi i64 [ 0, %land.rhs99.preheader ], [ %lsr.iv.next18, %if.then117 ] + %29 = phi i8 [ %31, %if.then117 ], [ %28, %land.rhs99.preheader ] + %scevgep32 = getelementptr i8* %scevgep31, i64 %lsr.iv17 + %30 = load i8* %scevgep32, align 1 + %cmp101 = icmp eq i8 %30, 0 + br i1 %cmp101, label %return, label %while.body104, !prof !1005 +while.body104: + %cmp107 = icmp eq i8 %29, %30 + %cmp111 = icmp eq i8 %29, 42 + %or.cond209 = or i1 %cmp107, %cmp111 + %cmp115 = icmp eq i8 %30, 94 + %or.cond210 = or i1 %or.cond209, %cmp115 + br i1 %or.cond210, label %if.then117, label %return, !prof !1006 +if.then117: + %scevgep41 = getelementptr i8* %scevgep40, i64 %lsr.iv17 + %scevgep42 = getelementptr i8* %scevgep41, i64 1 + %31 = load i8* %scevgep42, align 1 + %cmp97 = icmp eq i8 %31, 0 + %lsr.iv.next18 = add i64 %lsr.iv17, 1 + br i1 %cmp97, label %return, label %land.rhs99, !prof !1004 +if.else123: + %cmp124 = icmp eq i8 %3, 1 + %cmp127 = icmp eq i8 %2, 2 + %or.cond160 = and i1 %cmp124, %cmp127 + br i1 %or.cond160, label %while.cond130.preheader, label %return, !prof !1007 +while.cond130.preheader: + %sunkaddr82 = ptrtoint i8* %4 to i64 + %sunkaddr83 = add i64 %sunkaddr82, %lsr.iv27 + %sunkaddr84 = inttoptr i64 %sunkaddr83 to i8* + %32 = load i8* %sunkaddr84, align 1 + %cmp132244 = icmp eq i8 %32, 0 + br i1 %cmp132244, label %return, label %land.rhs134.preheader, !prof !1008 +land.rhs134.preheader: + %scevgep29 = getelementptr i8* %5, i64 %lsr.iv27 + %scevgep37 = getelementptr i8* %4, i64 %lsr.iv27 + br label %land.rhs134 +land.rhs134: + %lsr.iv22 = phi i64 [ 0, %land.rhs134.preheader ], [ %lsr.iv.next23, %if.then152 ] + %33 = phi i8 [ %35, %if.then152 ], [ %32, %land.rhs134.preheader ] + %scevgep30 = getelementptr i8* %scevgep29, i64 %lsr.iv22 + %34 = load i8* %scevgep30, align 1 + %cmp136 = icmp eq i8 %34, 0 + br i1 %cmp136, label %return, label %while.body139, !prof !1009 +while.body139: + %cmp142 = icmp eq i8 %33, %34 + %cmp146 = icmp eq i8 %34, 42 + %or.cond211 = or i1 %cmp142, %cmp146 + %cmp150 = icmp eq i8 %33, 94 + %or.cond212 = or i1 %or.cond211, %cmp150 + br i1 %or.cond212, label %if.then152, label %return, !prof !1010 +if.then152: + %scevgep38 = getelementptr i8* %scevgep37, i64 %lsr.iv22 + %scevgep39 = getelementptr i8* %scevgep38, i64 1 + %35 = load i8* %scevgep39, align 1 + %cmp132 = icmp eq i8 %35, 0 + %lsr.iv.next23 = add i64 %lsr.iv22, 1 + br i1 %cmp132, label %return, label %land.rhs134, !prof !1008 +return: + %retval.0 = phi i32 [ 0, %entry ], [ 1, %land.lhs.true52 ], [ 1, %land.lhs.true43 ], [ 0, %if.else123 ], [ 1, %while.cond59.preheader ], [ 1, %while.cond95.preheader ], [ 1, %while.cond130.preheader ], [ 1, %land.lhs.true28 ], [ 1, %if.then83 ], [ 0, %lor.lhs.false74 ], [ 1, %land.rhs ], [ 1, %if.then117 ], [ 0, %while.body104 ], [ 1, %land.rhs99 ], [ 1, %if.then152 ], [ 0, %while.body139 ], [ 1, %land.rhs134 ], [ 0, %while.body ] + ret i32 %retval.0 +} +!181 = metadata !{metadata !"branch_weights", i32 662038, i32 1} +!988 = metadata !{metadata !"branch_weights", i32 12091450, i32 1916} +!989 = metadata !{metadata !"branch_weights", i32 7564670, i32 4526781} +!990 = metadata !{metadata !"branch_weights", i32 7484958, i32 13283499} +!991 = metadata !{metadata !"branch_weights", i32 8677007, i32 4606493} +!992 = metadata !{metadata !"branch_weights", i32 -1172426948, i32 145094705} +!993 = metadata !{metadata !"branch_weights", i32 1468914, i32 5683688} +!994 = metadata !{metadata !"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616} +!995 = metadata !{metadata !"branch_weights", i32 1853716452, i32 -444717951, i32 932776759} +!996 = metadata !{metadata !"branch_weights", i32 1004870, i32 20259} +!997 = metadata !{metadata !"branch_weights", i32 20071, i32 189} +!998 = metadata !{metadata !"branch_weights", i32 -1020255939, i32 572177766} +!999 = metadata !{metadata !"branch_weights", i32 2666513, i32 3466431} +!1000 = metadata !{metadata !"branch_weights", i32 5117635, i32 1859780} +!1001 = metadata !{metadata !"branch_weights", i32 354902465, i32 -1444604407} +!1002 = metadata !{metadata !"branch_weights", i32 -1762419279, i32 1592770684} +!1003 = metadata !{metadata !"branch_weights", i32 1435905930, i32 -1951930624} +!1004 = metadata !{metadata !"branch_weights", i32 1, i32 504888} +!1005 = metadata !{metadata !"branch_weights", i32 94662, i32 504888} +!1006 = metadata !{metadata !"branch_weights", i32 -1897793104, i32 160196332} +!1007 = metadata !{metadata !"branch_weights", i32 2074643678, i32 -29579071} +!1008 = metadata !{metadata !"branch_weights", i32 1, i32 226163} +!1009 = metadata !{metadata !"branch_weights", i32 58357, i32 226163} +!1010 = metadata !{metadata !"branch_weights", i32 -2072848646, i32 92907517} diff --git a/test/CodeGen/AArch64/sext_inreg.ll b/test/CodeGen/AArch64/sext_inreg.ll new file mode 100644 index 0000000..2f76081 --- /dev/null +++ b/test/CodeGen/AArch64/sext_inreg.ll @@ -0,0 +1,198 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; For formal arguments, we have the following vector type promotion, +; v2i8 is promoted to v2i32(f64) +; v2i16 is promoted to v2i32(f64) +; v4i8 is promoted to v4i16(f64) +; v8i1 is promoted to v8i16(f128) + +define <2 x i8> @test_sext_inreg_v2i8i16(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i8i16 +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h + %1 = sext <2 x i8> %v1 to <2 x i16> + %2 = sext <2 x i8> %v2 to <2 x i16> + %3 = shufflevector <2 x i16> %1, <2 x i16> %2, <2 x i32> + %4 = trunc <2 x i16> %3 to <2 x i8> + ret <2 x i8> %4 +} + +define <2 x i8> @test_sext_inreg_v2i8i16_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i8i16_2 +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h + %a1 = shl <2 x i32> %v1, + %a2 = ashr <2 x i32> %a1, + %b1 = shl <2 x i32> %v2, + %b2 = ashr <2 x i32> %b1, + %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> + %d = trunc <2 x i32> %c to <2 x i8> + ret <2 x i8> %d +} + +define <2 x i8> @test_sext_inreg_v2i8i32(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i8i32 +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h + %1 = sext <2 x i8> %v1 to <2 x i32> + %2 = sext <2 x i8> %v2 to <2 x i32> + %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> + %4 = trunc <2 x i32> %3 to <2 x i8> + ret <2 x i8> %4 +} + +define <2 x i8> @test_sext_inreg_v2i8i64(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i8i64 +; CHECK: ushll v1.2d, v1.2s, #0 +; CHECK: ushll v0.2d, v0.2s, #0 +; CHECK: shl v0.2d, v0.2d, #56 +; CHECK: sshr v0.2d, v0.2d, #56 +; CHECK: shl v1.2d, v1.2d, #56 +; CHECK: sshr v1.2d, v1.2d, #56 + %1 = sext <2 x i8> %v1 to <2 x i64> + %2 = sext <2 x i8> %v2 to <2 x i64> + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + %4 = trunc <2 x i64> %3 to <2 x i8> + ret <2 x i8> %4 +} + +define <4 x i8> @test_sext_inreg_v4i8i16(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v4i8i16 +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h + %1 = sext <4 x i8> %v1 to <4 x i16> + %2 = sext <4 x i8> %v2 to <4 x i16> + %3 = shufflevector <4 x i16> %1, <4 x i16> %2, <4 x i32> + %4 = trunc <4 x i16> %3 to <4 x i8> + ret <4 x i8> %4 +} + +define <4 x i8> @test_sext_inreg_v4i8i16_2(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v4i8i16_2 +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h + %a1 = shl <4 x i16> %v1, + %a2 = ashr <4 x i16> %a1, + %b1 = shl <4 x i16> %v2, + %b2 = ashr <4 x i16> %b1, + %c = shufflevector <4 x i16> %a2, <4 x i16> %b2, <4 x i32> + %d = trunc <4 x i16> %c to <4 x i8> + ret <4 x i8> %d +} + +define <4 x i8> @test_sext_inreg_v4i8i32(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v4i8i32 +; CHECK: ushll v1.4s, v1.4h, #0 +; CHECK: ushll v0.4s, v0.4h, #0 +; CHECK: shl v0.4s, v0.4s, #24 +; CHECK: sshr v0.4s, v0.4s, #24 +; CHECK: shl v1.4s, v1.4s, #24 +; CHECK: sshr v1.4s, v1.4s, #24 + %1 = sext <4 x i8> %v1 to <4 x i32> + %2 = sext <4 x i8> %v2 to <4 x i32> + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + %4 = trunc <4 x i32> %3 to <4 x i8> + ret <4 x i8> %4 +} + +define <8 x i8> @test_sext_inreg_v8i8i16(<8 x i8> %v1, <8 x i8> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v8i8i16 +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK: sshll v1.8h, v1.8b, #0 + %1 = sext <8 x i8> %v1 to <8 x i16> + %2 = sext <8 x i8> %v2 to <8 x i16> + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + %4 = trunc <8 x i16> %3 to <8 x i8> + ret <8 x i8> %4 +} + +define <8 x i1> @test_sext_inreg_v8i1i16(<8 x i1> %v1, <8 x i1> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v8i1i16 +; CHECK: ushll v1.8h, v1.8b, #0 +; CHECK: ushll v0.8h, v0.8b, #0 +; CHECK: shl v0.8h, v0.8h, #15 +; CHECK: sshr v0.8h, v0.8h, #15 +; CHECK: shl v1.8h, v1.8h, #15 +; CHECK: sshr v1.8h, v1.8h, #15 + %1 = sext <8 x i1> %v1 to <8 x i16> + %2 = sext <8 x i1> %v2 to <8 x i16> + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + %4 = trunc <8 x i16> %3 to <8 x i1> + ret <8 x i1> %4 +} + +define <2 x i16> @test_sext_inreg_v2i16i32(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i16i32 +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s + %1 = sext <2 x i16> %v1 to <2 x i32> + %2 = sext <2 x i16> %v2 to <2 x i32> + %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> + %4 = trunc <2 x i32> %3 to <2 x i16> + ret <2 x i16> %4 +} + +define <2 x i16> @test_sext_inreg_v2i16i32_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i16i32_2 +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s + %a1 = shl <2 x i32> %v1, + %a2 = ashr <2 x i32> %a1, + %b1 = shl <2 x i32> %v2, + %b2 = ashr <2 x i32> %b1, + %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> + %d = trunc <2 x i32> %c to <2 x i16> + ret <2 x i16> %d +} + +define <2 x i16> @test_sext_inreg_v2i16i64(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i16i64 +; CHECK: ushll v1.2d, v1.2s, #0 +; CHECK: ushll v0.2d, v0.2s, #0 +; CHECK: shl v0.2d, v0.2d, #48 +; CHECK: sshr v0.2d, v0.2d, #48 +; CHECK: shl v1.2d, v1.2d, #48 +; CHECK: sshr v1.2d, v1.2d, #48 + %1 = sext <2 x i16> %v1 to <2 x i64> + %2 = sext <2 x i16> %v2 to <2 x i64> + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + %4 = trunc <2 x i64> %3 to <2 x i16> + ret <2 x i16> %4 +} + +define <4 x i16> @test_sext_inreg_v4i16i32(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v4i16i32 +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK: sshll v1.4s, v1.4h, #0 + %1 = sext <4 x i16> %v1 to <4 x i32> + %2 = sext <4 x i16> %v2 to <4 x i32> + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + %4 = trunc <4 x i32> %3 to <4 x i16> + ret <4 x i16> %4 +} + +define <2 x i32> @test_sext_inreg_v2i32i64(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { +; CHECK-LABEL: test_sext_inreg_v2i32i64 +; CHECK: sshll v0.2d, v0.2s, #0 +; CHECK: sshll v1.2d, v1.2s, #0 + %1 = sext <2 x i32> %v1 to <2 x i64> + %2 = sext <2 x i32> %v2 to <2 x i64> + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + %4 = trunc <2 x i64> %3 to <2 x i32> + ret <2 x i32> %4 +} + diff --git a/test/CodeGen/AArch64/sincospow-vector-expansion.ll b/test/CodeGen/AArch64/sincospow-vector-expansion.ll new file mode 100644 index 0000000..259a55e --- /dev/null +++ b/test/CodeGen/AArch64/sincospow-vector-expansion.ll @@ -0,0 +1,96 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + + +define <2 x float> @test_cos_v2f64(<2 x double> %v1) { +; CHECK-LABEL: test_cos_v2f64: +; CHECK: bl cos +; CHECK: bl cos + %1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %v1) + %2 = fptrunc <2 x double> %1 to <2 x float> + ret <2 x float> %2 +} + +define <2 x float> @test_sin_v2f64(<2 x double> %v1) { +; CHECK-LABEL: test_sin_v2f64: +; CHECK: bl sin +; CHECK: bl sin + %1 = call <2 x double> @llvm.sin.v2f64(<2 x double> %v1) + %2 = fptrunc <2 x double> %1 to <2 x float> + ret <2 x float> %2 +} + +define <2 x float> @test_pow_v2f64(<2 x double> %v1, <2 x double> %v2) { +; CHECK-LABEL: test_pow_v2f64: +; CHECK: bl pow +; CHECK: bl pow + %1 = call <2 x double> @llvm.pow.v2f64(<2 x double> %v1, <2 x double> %v2) + %2 = fptrunc <2 x double> %1 to <2 x float> + ret <2 x float> %2 +} + +declare <2 x double> @llvm.cos.v2f64(<2 x double>) +declare <2 x double> @llvm.sin.v2f64(<2 x double>) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) + +define <2 x float> @test_cos_v2f32(<2 x float> %v1) { +; CHECK-LABEL: test_cos_v2f32: +; CHECK: bl cos +; CHECK: bl cos + %1 = call <2 x float> @llvm.cos.v2f32(<2 x float> %v1) + ret <2 x float> %1 +} + +define <2 x float> @test_sin_v2f32(<2 x float> %v1) { +; CHECK-LABEL: test_sin_v2f32: +; CHECK: bl sin +; CHECK: bl sin + %1 = call <2 x float> @llvm.sin.v2f32(<2 x float> %v1) + ret <2 x float> %1 +} + +define <2 x float> @test_pow_v2f32(<2 x float> %v1, <2 x float> %v2) { +; CHECK-LABEL: test_pow_v2f32: +; CHECK: bl pow +; CHECK: bl pow + %1 = call <2 x float> @llvm.pow.v2f32(<2 x float> %v1, <2 x float> %v2) + ret <2 x float> %1 +} + +declare <2 x float> @llvm.cos.v2f32(<2 x float>) +declare <2 x float> @llvm.sin.v2f32(<2 x float>) +declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) + +define <4 x float> @test_cos_v4f32(<4 x float> %v1) { +; CHECK-LABEL: test_cos_v4f32: +; CHECK: bl cos +; CHECK: bl cos +; CHECK: bl cos +; CHECK: bl cos + %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %v1) + ret <4 x float> %1 +} + +define <4 x float> @test_sin_v4f32(<4 x float> %v1) { +; CHECK-LABEL: test_sin_v4f32: +; CHECK: bl sin +; CHECK: bl sin +; CHECK: bl sin +; CHECK: bl sin + %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %v1) + ret <4 x float> %1 +} + +define <4 x float> @test_pow_v4f32(<4 x float> %v1, <4 x float> %v2) { +; CHECK-LABEL: test_pow_v4f32: +; CHECK: bl pow +; CHECK: bl pow +; CHECK: bl pow +; CHECK: bl pow + %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %v1, <4 x float> %v2) + ret <4 x float> %1 +} + +declare <4 x float> @llvm.cos.v4f32(<4 x float>) +declare <4 x float> @llvm.sin.v4f32(<4 x float>) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) + diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll index f3d376b..1c7f1e0 100644 --- a/test/CodeGen/AArch64/variadic.ll +++ b/test/CodeGen/AArch64/variadic.ll @@ -10,14 +10,12 @@ declare void @llvm.va_start(i8*) define void @test_simple(i32 %n, ...) { ; CHECK-LABEL: test_simple: ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var ; CHECK: mov x[[FPRBASE:[0-9]+]], sp ; CHECK: str q7, [x[[FPRBASE]], #112] ; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] ; CHECK: str x7, [x[[GPRBASE]], #48] ; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var ; CHECK-NOFP: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] ; CHECK-NOFP: str x7, [x[[GPRBASE]], #48] ; CHECK-NOFP-NOT: str q7, @@ -27,8 +25,10 @@ define void @test_simple(i32 %n, ...) { ; CHECK: str q0, [sp] ; CHECK: str x1, [sp, #[[GPRFROMSP]]] +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var ; CHECK-NOFP-NOT: str q0, [sp] +; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) @@ -179,26 +179,63 @@ define void @test_va_copy() { ; Check beginning and end again: -; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] ; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] +; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list +; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] +; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] +; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] +; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24] + ; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var +; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list +; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] +; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] +; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] +; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24] -; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list] + ret void +; CHECK: ret +; CHECK-NOFP: ret +} -; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24] -; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list +%struct.s_3i = type { i32, i32, i32 } -; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24] +; This checks that, if the last named argument is not a multiple of 8 bytes, +; and is allocated on the stack, that __va_list.__stack is initialised to the +; first 8-byte aligned location above it. +define void @test_va_odd_struct_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, [1 x i64], %struct.s_3i* byval nocapture readnone align 4 %h, ...) { +; CHECK-LABEL: test_va_odd_struct_on_stack: -; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list] +; CHECK: sub sp, sp, #128 +; CHECK: mov x[[FPRBASE:[0-9]+]], sp +; CHECK: str q7, [x[[FPRBASE]], #112] -; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24] -; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list +; CHECK-NOT: str x{{[0-9]+}}, + +; CHECK-NOFP-NOT: str q7, +; CHECK-NOT: str x7, -; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24] +; Omit the middle ones + +; CHECK: str q0, [sp] + %addr = bitcast %va_list* @var to i8* + call void @llvm.va_start(i8* %addr) +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var +; CHECK: movn [[VR_OFFS:w[0-9]+]], #127 +; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] +; CHECK: str wzr, [x[[VA_LIST]], #24] +; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128 +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; This constant would be #140 if it was not 8-byte aligned +; CHECK: add [[STACK:x[0-9]+]], sp, #144 +; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] + +; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var +; This constant would be #12 if it was not 8-byte aligned +; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #16 +; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] +; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] +; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24] ret void -; CHECK: ret -; CHECK-NOFP: ret } diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll index a0235f7..f8bd886 100644 --- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll +++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null %struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] } @ld = external global %struct.layer_data* ; <%struct.layer_data**> [#uses=1] diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll index 8d3337c..cf5094f 100644 --- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll +++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep "add.*#0" +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @foo() { entry: @@ -10,3 +10,6 @@ entry: } declare i32 @bar(...) + +; CHECK-NOT: add{{.*}}#0 + diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll index b3b0769..99e67d5 100644 --- a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll +++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null define i32 @test3() { tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 ) diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll index 670048b..5988c65 100644 --- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll +++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep "str.*\!" +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s %struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 } %struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 } @@ -32,3 +32,6 @@ bb140: ; preds = %bb140, %cond_false bb174: ; preds = %bb140, %cond_false ret %struct.shape_path_t* null } + +; CHECK-NOT: str{{.*}}! + diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll index a604c5c..95aa595 100644 --- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll +++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s define i32 @main(i32 %argc, i8** %argv) { entry: @@ -12,3 +12,6 @@ bb2: ; preds = %bb1 bb3: ; preds = %bb1 ret i32 0 } + +; CHECK-NOT: 255 + diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll index 4cb768e..9f50d92 100644 --- a/test/CodeGen/ARM/2008-07-17-Fdiv.ll +++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define float @f(float %a, float %b) nounwind { %tmp = fdiv float %a, %b diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll index 83fde07..e86bc1b 100644 --- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll +++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null ; PR2589 define void @main({ i32 }*) { diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll index 601a516..d16ad8c 100644 --- a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o /dev/null define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind { entry: diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll index a1ce384..7bb1429 100644 --- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll +++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null %struct.hit_t = type { %struct.v_t, double } %struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 } diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll index 7342f69..e90c5b3 100644 --- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll +++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep "swi 107" +; RUN: llc -mtriple=arm-eabi -no-integrated-as %s -o - | FileCheck %s define i32 @_swilseek(i32) nounwind { entry: @@ -18,3 +18,6 @@ return: ; preds = %entry %4 = load i32* %retval ; [#uses=1] ret i32 %4 } + +; CHECK: swi 107 + diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll index f6b3d2c..ade6a10 100644 --- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll +++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null ; PR3795 define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) { diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll index 99907fc..606c6b1 100644 --- a/test/CodeGen/ARM/2009-04-08-FREM.ll +++ b/test/CodeGen/ARM/2009-04-08-FREM.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null declare i32 @printf(i8*, ...) diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll index 05d2f26..9e32e05 100644 --- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll +++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) { entry: diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll index deb092b..5b17463 100644 --- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll +++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null ; PR3954 define void @foo(...) nounwind { diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll index 7046fcc..2bc7df0 100644 --- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll +++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null + %struct.List = type { %struct.List*, i32 } @Node5 = external constant %struct.List ; <%struct.List*> [#uses=1] @"\01LC" = external constant [7 x i8] ; <[7 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll index 1e2707f..5d59fc6 100644 --- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll +++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm | FileCheck %s -; RUN: llc < %s -march=thumb | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s ; PR4091 define void @foo(i32 %i, i32* %p) nounwind { diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll index e1e94b6..3cef0aa 100644 --- a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll +++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null define void @test(i8* %x) nounwind { entry: diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll index 6761687..bc4a95c 100644 --- a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll +++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null %struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* } %struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* } diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll deleted file mode 100644 index 392c70a..0000000 --- a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s - -; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm' - -@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 - -; CHECK: .globl l_objc_msgSend_fixup_alloc -; CHECK: .weak_definition l_objc_msgSend_fixup_alloc diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll index ee99c70..b078ec0 100644 --- a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll +++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; pr4843 + define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind { ;CHECK-LABEL: v2regbug: ;CHECK: vzip.16 diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll index 10653b5..66ffe6a 100644 --- a/test/CodeGen/ARM/2009-09-10-postdec.ll +++ b/test/CodeGen/ARM/2009-09-10-postdec.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; Radar 7213850 define i32 @test(i8* %d, i32 %x, i32 %y) nounwind { diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll index 758b59a..dd9a6fd 100644 --- a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll +++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9 +; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9 %s -o /dev/null define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind { %1 = ptrtoint i8* %pBuffer to i32 diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll index eb9c2d0..224bd01 100644 --- a/test/CodeGen/ARM/2009-09-24-spill-align.ll +++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; pr4926 define void @test_vget_lanep16() nounwind { diff --git a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll index b0b4cb3..5e75d46 100644 --- a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll +++ b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) { %1 = sub i32 undef, 48 ; [#uses=1] diff --git a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll index 89d6a68..ceef083 100644 --- a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll +++ b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=arm -mattr=+neon < %s -; Radar 7770501: Don't crash on SELECT and SELECT_CC with NEON vector values. +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o /dev/null +; rdar://7770501 : Don't crash on SELECT and SELECT_CC with NEON vector values. define void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind { entry: diff --git a/test/CodeGen/ARM/2010-04-14-SplitVector.ll b/test/CodeGen/ARM/2010-04-14-SplitVector.ll index 5d0c3cf..cb3e042 100644 --- a/test/CodeGen/ARM/2010-04-14-SplitVector.ll +++ b/test/CodeGen/ARM/2010-04-14-SplitVector.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=arm1136jf-s +; RUN: llc -mtriple=arm-eabi -mcpu=arm1136jf-s %s -o /dev/null ; Radar 7854640 define void @test() nounwind { diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index e0f50c9..cfaffd8 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic +; RUN: llc -mtriple=arm-eabi -mattr=+neon -O0 -optimize-regalloc -regalloc=basic %s -o /dev/null ; This test would crash the rewriter when trying to handle a spill after one of ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register. diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index a400b7b..5bc08b0 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; Radar 7872877 define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind { diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll index 6f48796..f7ceb6e 100644 --- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll +++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon -; Radar 8084742 +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o /dev/null +; rdar://8084742 %struct.__int8x8x2_t = type { [2 x <8 x i8>] } diff --git a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll index 984583e..fcabc90 100644 --- a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll +++ b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o /dev/null @.str271 = external constant [21 x i8], align 4 ; <[21 x i8]*> [#uses=1] @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8**)* @main to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll index 2842437..80822c2 100644 --- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll +++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll @@ -1,4 +1,4 @@ -; RUN: llc -enable-correct-eh-support < %s +; RUN: llc < %s ; PR7716 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" target triple = "thumbv7-apple-darwin10.0.0" diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll index 7aacd1a..bc4cc98 100644 --- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -124,6 +124,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] !45 = metadata !{i32 27, i32 0, metadata !39, null} !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20} -!47 = metadata !{i32 0} +!47 = metadata !{} !48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"} !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll deleted file mode 100644 index 3053694..0000000 --- a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll +++ /dev/null @@ -1,285 +0,0 @@ -; This tests that MC/asm header conversion is smooth and that the -; build attributes are correct - -; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6 -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M -; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8 -; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON -; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15 -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0 -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD -; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57 - -; V6: .eabi_attribute 6, 6 -; V6: .eabi_attribute 8, 1 -; V6: .eabi_attribute 24, 1 -; V6: .eabi_attribute 25, 1 -; V6-NOT: .eabi_attribute 27 -; V6-NOT: .eabi_attribute 28 -; V6-NOT: .eabi_attribute 36 -; V6-NOT: .eabi_attribute 42 -; V6-NOT: .eabi_attribute 68 - -; V6M: .eabi_attribute 6, 12 -; V6M: .eabi_attribute 7, 77 -; V6M: .eabi_attribute 8, 0 -; V6M: .eabi_attribute 9, 1 -; V6M: .eabi_attribute 24, 1 -; V6M: .eabi_attribute 25, 1 -; V6M-NOT: .eabi_attribute 27 -; V6M-NOT: .eabi_attribute 28 -; V6M-NOT: .eabi_attribute 36 -; V6M-NOT: .eabi_attribute 42 -; V6M-NOT: .eabi_attribute 68 - -; ARM1156T2F-S: .cpu arm1156t2f-s -; ARM1156T2F-S: .eabi_attribute 6, 8 -; ARM1156T2F-S: .eabi_attribute 8, 1 -; ARM1156T2F-S: .eabi_attribute 9, 2 -; ARM1156T2F-S: .fpu vfpv2 -; ARM1156T2F-S: .eabi_attribute 20, 1 -; ARM1156T2F-S: .eabi_attribute 21, 1 -; ARM1156T2F-S: .eabi_attribute 23, 3 -; ARM1156T2F-S: .eabi_attribute 24, 1 -; ARM1156T2F-S: .eabi_attribute 25, 1 -; ARM1156T2F-S-NOT: .eabi_attribute 27 -; ARM1156T2F-S-NOT: .eabi_attribute 28 -; ARM1156T2F-S-NOT: .eabi_attribute 36 -; ARM1156T2F-S-NOT: .eabi_attribute 42 -; ARM1156T2F-S-NOT: .eabi_attribute 68 - -; V7M: .eabi_attribute 6, 10 -; V7M: .eabi_attribute 7, 77 -; V7M: .eabi_attribute 8, 0 -; V7M: .eabi_attribute 9, 2 -; V7M: .eabi_attribute 24, 1 -; V7M: .eabi_attribute 25, 1 -; V7M-NOT: .eabi_attribute 27 -; V7M-NOT: .eabi_attribute 28 -; V7M-NOT: .eabi_attribute 36 -; V7M-NOT: .eabi_attribute 42 -; V7M: .eabi_attribute 44, 0 -; V7M-NOT: .eabi_attribute 68 - -; V7: .syntax unified -; V7: .eabi_attribute 6, 10 -; V7: .eabi_attribute 20, 1 -; V7: .eabi_attribute 21, 1 -; V7: .eabi_attribute 23, 3 -; V7: .eabi_attribute 24, 1 -; V7: .eabi_attribute 25, 1 -; V7-NOT: .eabi_attribute 27 -; V7-NOT: .eabi_attribute 28 -; V7-NOT: .eabi_attribute 36 -; V7-NOT: .eabi_attribute 42 -; V7-NOT: .eabi_attribute 68 - -; V8: .syntax unified -; V8: .eabi_attribute 6, 14 - -; Vt8: .syntax unified -; Vt8: .eabi_attribute 6, 14 - -; V8-FPARMv8: .syntax unified -; V8-FPARMv8: .eabi_attribute 6, 14 -; V8-FPARMv8: .fpu fp-armv8 - -; V8-NEON: .syntax unified -; V8-NEON: .eabi_attribute 6, 14 -; V8-NEON: .fpu neon -; V8-NEON: .eabi_attribute 12, 3 - -; V8-FPARMv8-NEON: .syntax unified -; V8-FPARMv8-NEON: .eabi_attribute 6, 14 -; V8-FPARMv8-NEON: .fpu neon-fp-armv8 -; V8-FPARMv8-NEON: .eabi_attribute 12, 3 - -; V8-FPARMv8-NEON-CRYPTO: .syntax unified -; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 6, 14 -; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8 -; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3 - -; CORTEX-A9-SOFT: .cpu cortex-a9 -; CORTEX-A9-SOFT: .eabi_attribute 6, 10 -; CORTEX-A9-SOFT: .eabi_attribute 7, 65 -; CORTEX-A9-SOFT: .eabi_attribute 8, 1 -; CORTEX-A9-SOFT: .eabi_attribute 9, 2 -; CORTEX-A9-SOFT: .fpu neon -; CORTEX-A9-SOFT: .eabi_attribute 20, 1 -; CORTEX-A9-SOFT: .eabi_attribute 21, 1 -; CORTEX-A9-SOFT: .eabi_attribute 23, 3 -; CORTEX-A9-SOFT: .eabi_attribute 24, 1 -; CORTEX-A9-SOFT: .eabi_attribute 25, 1 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 28 -; CORTEX-A9-SOFT: .eabi_attribute 36, 1 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 42 -; CORTEX-A9-SOFT: .eabi_attribute 68, 1 - -; CORTEX-A9-HARD: .cpu cortex-a9 -; CORTEX-A9-HARD: .eabi_attribute 6, 10 -; CORTEX-A9-HARD: .eabi_attribute 7, 65 -; CORTEX-A9-HARD: .eabi_attribute 8, 1 -; CORTEX-A9-HARD: .eabi_attribute 9, 2 -; CORTEX-A9-HARD: .fpu neon -; CORTEX-A9-HARD: .eabi_attribute 20, 1 -; CORTEX-A9-HARD: .eabi_attribute 21, 1 -; CORTEX-A9-HARD: .eabi_attribute 23, 3 -; CORTEX-A9-HARD: .eabi_attribute 24, 1 -; CORTEX-A9-HARD: .eabi_attribute 25, 1 -; CORTEX-A9-HARD-NOT: .eabi_attribute 27 -; CORTEX-A9-HARD: .eabi_attribute 28, 1 -; CORTEX-A9-HARD: .eabi_attribute 36, 1 -; CORTEX-A9-HARD-NOT: .eabi_attribute 42 -; CORTEX-A9-HARD: .eabi_attribute 68, 1 - -; CORTEX-A9-MP: .cpu cortex-a9-mp -; CORTEX-A9-MP: .eabi_attribute 6, 10 -; CORTEX-A9-MP: .eabi_attribute 7, 65 -; CORTEX-A9-MP: .eabi_attribute 8, 1 -; CORTEX-A9-MP: .eabi_attribute 9, 2 -; CORTEX-A9-MP: .fpu neon -; CORTEX-A9-MP: .eabi_attribute 20, 1 -; CORTEX-A9-MP: .eabi_attribute 21, 1 -; CORTEX-A9-MP: .eabi_attribute 23, 3 -; CORTEX-A9-MP: .eabi_attribute 24, 1 -; CORTEX-A9-MP: .eabi_attribute 25, 1 -; CORTEX-A9-NOT: .eabi_attribute 27 -; CORTEX-A9-NOT: .eabi_attribute 28 -; CORTEX-A9-MP: .eabi_attribute 36, 1 -; CORTEX-A9-MP: .eabi_attribute 42, 1 -; CORTEX-A9-MP: .eabi_attribute 68, 1 - -; CORTEX-A15: .cpu cortex-a15 -; CORTEX-A15: .eabi_attribute 6, 10 -; CORTEX-A15: .eabi_attribute 7, 65 -; CORTEX-A15: .eabi_attribute 8, 1 -; CORTEX-A15: .eabi_attribute 9, 2 -; CORTEX-A15: .fpu neon-vfpv4 -; CORTEX-A15: .eabi_attribute 20, 1 -; CORTEX-A15: .eabi_attribute 21, 1 -; CORTEX-A15: .eabi_attribute 23, 3 -; CORTEX-A15: .eabi_attribute 24, 1 -; CORTEX-A15: .eabi_attribute 25, 1 -; CORTEX-A15-NOT: .eabi_attribute 27 -; CORTEX-A15-NOT: .eabi_attribute 28 -; CORTEX-A15: .eabi_attribute 36, 1 -; CORTEX-A15: .eabi_attribute 42, 1 -; CORTEX-A15: .eabi_attribute 44, 2 -; CORTEX-A15: .eabi_attribute 68, 3 - -; CORTEX-M0: .cpu cortex-m0 -; CORTEX-M0: .eabi_attribute 6, 12 -; CORTEX-M0: .eabi_attribute 7, 77 -; CORTEX-M0: .eabi_attribute 8, 0 -; CORTEX-M0: .eabi_attribute 9, 1 -; CORTEX-M0: .eabi_attribute 24, 1 -; CORTEX-M0: .eabi_attribute 25, 1 -; CORTEX-M0-NOT: .eabi_attribute 27 -; CORTEX-M0-NOT: .eabi_attribute 28 -; CORTEX-M0-NOT: .eabi_attribute 36 -; CORTEX-M0-NOT: .eabi_attribute 42 -; CORTEX-M0-NOT: .eabi_attribute 68 - -; CORTEX-M4-SOFT: .cpu cortex-m4 -; CORTEX-M4-SOFT: .eabi_attribute 6, 13 -; CORTEX-M4-SOFT: .eabi_attribute 7, 77 -; CORTEX-M4-SOFT: .eabi_attribute 8, 0 -; CORTEX-M4-SOFT: .eabi_attribute 9, 2 -; CORTEX-M4-SOFT: .fpu vfpv4-d16 -; CORTEX-M4-SOFT: .eabi_attribute 20, 1 -; CORTEX-M4-SOFT: .eabi_attribute 21, 1 -; CORTEX-M4-SOFT: .eabi_attribute 23, 3 -; CORTEX-M4-SOFT: .eabi_attribute 24, 1 -; CORTEX-M4-SOFT: .eabi_attribute 25, 1 -; CORTEX-M4-SOFT: .eabi_attribute 27, 1 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 28 -; CORTEX-M4-SOFT: .eabi_attribute 36, 1 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 -; CORTEX-M4-SOFT: .eabi_attribute 44, 0 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 - -; CORTEX-M4-HARD: .cpu cortex-m4 -; CORTEX-M4-HARD: .eabi_attribute 6, 13 -; CORTEX-M4-HARD: .eabi_attribute 7, 77 -; CORTEX-M4-HARD: .eabi_attribute 8, 0 -; CORTEX-M4-HARD: .eabi_attribute 9, 2 -; CORTEX-M4-HARD: .fpu vfpv4-d16 -; CORTEX-M4-HARD: .eabi_attribute 20, 1 -; CORTEX-M4-HARD: .eabi_attribute 21, 1 -; CORTEX-M4-HARD: .eabi_attribute 23, 3 -; CORTEX-M4-HARD: .eabi_attribute 24, 1 -; CORTEX-M4-HARD: .eabi_attribute 25, 1 -; CORTEX-M4-HARD: .eabi_attribute 27, 1 -; CORTEX-M4-HARD: .eabi_attribute 28, 1 -; CORTEX-M4-HARD: .eabi_attribute 36, 1 -; CORTEX-M4-HARD-NOT: .eabi_attribute 42 -; CORTEX-M4-HARD: .eabi_attribute 44, 0 -; CORTEX-M4-HRAD-NOT: .eabi_attribute 68 - -; CORTEX-R5: .cpu cortex-r5 -; CORTEX-R5: .eabi_attribute 6, 10 -; CORTEX-R5: .eabi_attribute 7, 82 -; CORTEX-R5: .eabi_attribute 8, 1 -; CORTEX-R5: .eabi_attribute 9, 2 -; CORTEX-R5: .fpu vfpv3-d16 -; CORTEX-R5: .eabi_attribute 20, 1 -; CORTEX-R5: .eabi_attribute 21, 1 -; CORTEX-R5: .eabi_attribute 23, 3 -; CORTEX-R5: .eabi_attribute 24, 1 -; CORTEX-R5: .eabi_attribute 25, 1 -; CORTEX-R5: .eabi_attribute 27, 1 -; CORTEX-R5-NOT: .eabi_attribute 28 -; CORTEX-R5-NOT: .eabi_attribute 36 -; CORTEX-R5-NOT: .eabi_attribute 42 -; CORTEX-R5: .eabi_attribute 44, 2 -; CORTEX-R5-NOT: .eabi_attribute 68 - -; CORTEX-A53: .cpu cortex-a53 -; CORTEX-A53: .eabi_attribute 6, 14 -; CORTEX-A53: .eabi_attribute 7, 65 -; CORTEX-A53: .eabi_attribute 8, 1 -; CORTEX-A53: .eabi_attribute 9, 2 -; CORTEX-A53: .fpu crypto-neon-fp-armv8 -; CORTEX-A53: .eabi_attribute 12, 3 -; CORTEX-A53: .eabi_attribute 24, 1 -; CORTEX-A53: .eabi_attribute 25, 1 -; CORTEX-A53-NOT: .eabi_attribute 27 -; CORTEX-A53-NOT: .eabi_attribute 28 -; CORTEX-A53: .eabi_attribute 36, 1 -; CORTEX-A53: .eabi_attribute 42, 1 -; CORTEX-A53: .eabi_attribute 44, 2 -; CORTEX-A53: .eabi_attribute 68, 3 - -; CORTEX-A57: .cpu cortex-a57 -; CORTEX-A57: .eabi_attribute 6, 14 -; CORTEX-A57: .eabi_attribute 7, 65 -; CORTEX-A57: .eabi_attribute 8, 1 -; CORTEX-A57: .eabi_attribute 9, 2 -; CORTEX-A57: .fpu crypto-neon-fp-armv8 -; CORTEX-A57: .eabi_attribute 12, 3 -; CORTEX-A57: .eabi_attribute 24, 1 -; CORTEX-A57: .eabi_attribute 25, 1 -; CORTEX-A57-NOT: .eabi_attribute 27 -; CORTEX-A57-NOT: .eabi_attribute 28 -; CORTEX-A57: .eabi_attribute 36, 1 -; CORTEX-A57: .eabi_attribute 42, 1 -; CORTEX-A57: .eabi_attribute 44, 2 -; CORTEX-A57: .eabi_attribute 68, 3 - -define i32 @f(i64 %z) { - ret i32 0 -} diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll index eef6abd..4baee64 100644 --- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll +++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll @@ -5,11 +5,11 @@ define hidden void @foo() nounwind ssp { entry: ; CHECK-LABEL: foo: ; CHECK: mov r7, sp -; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vpush {d10, d11} +; CHECK-NEXT: vpush {d8} tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind -; CHECK: vpop {d10, d11} -; CHECK-NEXT: vpop {d8} +; CHECK: vpop {d8} +; CHECK-NEXT: vpop {d10, d11} ret void } diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index f57411b..b1d59aa 100644 --- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -17,7 +17,7 @@ target triple = "thumbv7-apple-darwin10" ; DW_OP_constu ; offset -;CHECK: .long Lset6 +;CHECK: .long Lset7 ;CHECK-NEXT: @ DW_AT_type ;CHECK-NEXT: @ DW_AT_decl_file ;CHECK-NEXT: @ DW_AT_decl_line @@ -80,7 +80,7 @@ entry: !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41, metadata !48, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !47, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !4 = metadata !{metadata !5, metadata !5} !5 = metadata !{i32 786468, metadata !47, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] @@ -126,5 +126,5 @@ entry: !45 = metadata !{metadata !24, metadata !25} !46 = metadata !{metadata !27, metadata !28} !47 = metadata !{metadata !"foo.c", metadata !"/tmp/"} -!48 = metadata !{i32 0} +!48 = metadata !{} !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/test/CodeGen/ARM/2011-04-12-AlignBug.ll index 317be94..97297f7 100644 --- a/test/CodeGen/ARM/2011-04-12-AlignBug.ll +++ b/test/CodeGen/ARM/2011-04-12-AlignBug.ll @@ -3,9 +3,9 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- target triple = "thumbv7-apple-darwin10.0.0" ; CHECK: align 3 -@.v = linker_private unnamed_addr constant <4 x i32> , align 8 +@.v = private unnamed_addr constant <4 x i32> , align 8 ; CHECK: align 2 -@.strA = linker_private unnamed_addr constant [4 x i8] c"bar\00" +@.strA = private unnamed_addr constant [4 x i8] c"bar\00" ; CHECK-NOT: align -@.strB = linker_private unnamed_addr constant [4 x i8] c"foo\00", align 1 -@.strC = linker_private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1 +@.strB = private unnamed_addr constant [4 x i8] c"foo\00", align 1 +@.strC = private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1 diff --git a/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll index 7f0f795..12cdd04 100644 --- a/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll +++ b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 -arm-tail-calls=1 | FileCheck %s +; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-darwin10" diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll index 101a913..d93cc57 100644 --- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll +++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s +; RUN: llc < %s | FileCheck %s ; tail call inside a function where byval argument is splitted between ; registers and stack is currently unsupported. ; XFAIL: * target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-ios" +target triple = "thumbv7-apple-ios5.0" %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }> %struct.B = type <{ i32, i16, i16 }> diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index bb78707..ed2840b 100644 --- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -8,7 +8,7 @@ ; DW_OP_constu ; offset -;CHECK: .long Lset8 +;CHECK: .long Lset9 ;CHECK-NEXT: @ DW_AT_type ;CHECK-NEXT: @ DW_AT_decl_file ;CHECK-NEXT: @ DW_AT_decl_line @@ -75,7 +75,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!49} -!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41, metadata !41, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41, metadata !48, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [get1] !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] @@ -123,5 +123,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !45 = metadata !{metadata !19, metadata !20} !46 = metadata !{metadata !27, metadata !28} !47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"} -!48 = metadata !{i32 0} +!48 = metadata !{} !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll index 03614ed..17bd291 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll @@ -6,10 +6,10 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64- target triple = "thumbv7-apple-ios5.0.0" ; CHECK-GENERIC: strb -; CHECK-GENERIT-NEXT: strb -; CHECK-GENERIT-NEXT: strb -; CHECK-GENERIT-NEXT: strb -; CHECK-GENERIT-NEXT: strb +; CHECK-GENERIC-NEXT: strb +; CHECK-GENERIC-NEXT: strb +; CHECK-GENERIC-NEXT: strb +; CHECK-GENERIC-NEXT: strb ; CHECK-UNALIGNED: strb ; CHECK-UNALIGNED: str define void @foo(i8* nocapture %c) nounwind optsize { diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll index 850c511..c8e08c2 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; Trigger multiple NEON stores. ; CHECK: vst1.64 diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll index 8a65f2e..a707a92 100644 --- a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll +++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; PR11319 @i8_res = global <2 x i8> diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll index 42eb32d..c1554d8 100644 --- a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll +++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; PR11319 @src1_v2i16 = global <2 x i16> diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll index 719571b..c50461a 100644 --- a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll +++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <2 x i32> @test1(<2 x double>* %A) { ; CHECK: test1 diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index a263c9c..86b58c8 100644 --- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mcpu=cortex-a9 %s -o - | FileCheck %s @A = global <4 x float> diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll index 089dc91..9b71be2 100644 --- a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll +++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -enable-unsafe-fp-math %s -o /dev/null ;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" ;target triple = "armv7-none-linux-gnueabi" diff --git a/test/CodeGen/ARM/2012-05-04-vmov.ll b/test/CodeGen/ARM/2012-05-04-vmov.ll index 14dbf7f..c604eed 100644 --- a/test/CodeGen/ARM/2012-05-04-vmov.ll +++ b/test/CodeGen/ARM/2012-05-04-vmov.ll @@ -1,5 +1,9 @@ -; RUN: llc -O1 -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=A9-CHECK %s -; RUN: llc -O1 -march=arm -mcpu=swift < %s | FileCheck -check-prefix=SWIFT-CHECK %s +; RUN: llc -O1 -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \ +; RUN: | FileCheck -check-prefix=A9-CHECK %s + +; RUN: llc -O1 -mtriple=arm-eabi -mcpu=swift %s -o - \ +; RUN: | FileCheck -check-prefix=SWIFT-CHECK %s + ; Check that swift doesn't use vmov.32. . define <2 x i32> @testuvec(<2 x i32> %A, <2 x i32> %B) nounwind { diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll index dd67843..7f30ae1 100644 --- a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll +++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm -mcpu=swift < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s ; define void @f(i32 %x, i32* %p) nounwind ssp { diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll index 647ebd6..e8d4fb2 100644 --- a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll +++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; PR12281 ; Test generataion of code for vmull instruction when multiplying 128-bit diff --git a/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll b/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll index 3bdbb3c..8d77763 100644 --- a/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll +++ b/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=arm7tdmi | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=arm7tdmi %s -o - | FileCheck %s ; movw is only legal for V6T2 and later. ; rdar://12300648 diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll index 38624e0..5235e9c 100644 --- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll +++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll @@ -1,4 +1,4 @@ -; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s +; RUN: not llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - 2>&1 | FileCheck %s ; Check for error message: ; CHECK: non-trivial scalar-to-vector conversion, possible invalid constraint for vector type diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll index 7ba693d..d389b5c 100644 --- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll +++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll @@ -1,4 +1,4 @@ -; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s +; RUN: not llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - 2>&1 | FileCheck %s ; Check for error message: ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll index 127429b..c5eba7d 100644 --- a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll +++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll @@ -4,8 +4,8 @@ ;CHECK-LABEL: foo: ;CHECK: sub sp, sp, #8 ;CHECK: push {r11, lr} -;CHECK: str r0, [sp, #8] -;CHECK: add r0, sp, #8 +;CHECK: str r0, [sp, #12] +;CHECK: add r0, sp, #12 ;CHECK: bl fooUseParam ;CHECK: pop {r11, lr} ;CHECK: add sp, sp, #8 diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll index 08bf99b..6bd23b1 100644 --- a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll +++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll @@ -72,7 +72,7 @@ define void @foo(double %p0, ; --> D0 double %p8, ; --> Stack i32 %p9) #0 { ; --> R0, not Stack+8 entry: - tail call void @fooUseI32(i32 %p9) + call void @fooUseI32(i32 %p9) ret void } diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll index 6db71fe..e79a3ba 100644 --- a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll +++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll @@ -23,9 +23,9 @@ define void @foo(double %vfp0, ; --> D0, NSAA=SP entry: ;CHECK: sub sp, #8 ;CHECK: push.w {r11, lr} - ;CHECK: add r0, sp, #16 - ;CHECK: str r2, [sp, #20] - ;CHECK: str r1, [sp, #16] + ;CHECK: add r0, sp, #8 + ;CHECK: str r2, [sp, #12] + ;CHECK: str r1, [sp, #8] ;CHECK: bl fooUseStruct call void @fooUseStruct(%st_t* %p1) ret void diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll index c4f5f54..480d087 100644 --- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll +++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll @@ -72,6 +72,27 @@ KBBlockZero.exit: ; preds = %bb2.i indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0] } +@foo = global i32 ()* null +define i32 @t4(i32 %x, i32 ()* %p_foo) { +entry: +;CHECK-LABEL: t4: +;CHECK-V8-LABEL: t4: + %cmp = icmp slt i32 %x, 60 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %tmp.2 = call i32 %p_foo() + %sub = add nsw i32 %x, -1 + br label %return + +if.else: ; preds = %entry + %sub1 = add nsw i32 %x, -120 + br label %return + +return: ; preds = %if.end5, %if.then4, %if.then + %retval.0 = phi i32 [ %sub, %if.then ], [ %sub1, %if.else ] + ret i32 %retval.0 +} ; If-converter was checking for the wrong predicate subsumes pattern when doing ; nested predicates. diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll new file mode 100644 index 0000000..6c0fbd0 --- /dev/null +++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=arm-eabi -mattr=+neon -print-before=post-RA-sched %s -o - 2>&1 \ +; RUN: | FileCheck %s + +define void @vst(i8* %m, [4 x i64] %v) { +entry: +; CHECK: vst: +; CHECK: VST1d64Q %R{{[0-9]+}}, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}} + + %v0 = extractvalue [4 x i64] %v, 0 + %v1 = extractvalue [4 x i64] %v, 1 + %v2 = extractvalue [4 x i64] %v, 2 + %v3 = extractvalue [4 x i64] %v, 3 + + %t0 = bitcast i64 %v0 to <8 x i8> + %t1 = bitcast i64 %v1 to <8 x i8> + %t2 = bitcast i64 %v2 to <8 x i8> + %t3 = bitcast i64 %v3 to <8 x i8> + + %s0 = bitcast <8 x i8> %t0 to <1 x i64> + %s1 = bitcast <8 x i8> %t1 to <1 x i64> + %s2 = bitcast <8 x i8> %t2 to <1 x i64> + %s3 = bitcast <8 x i8> %t3 to <1 x i64> + + %tmp0 = bitcast <1 x i64> %s2 to i64 + %tmp1 = bitcast <1 x i64> %s3 to i64 + + %n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0 + %n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1 + + call void @llvm.arm.neon.vst4.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8) + + call void @bar(<2 x i64> %n1) + + ret void +} + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind { +; CHECK: vtbx4: +; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x4_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = load <8 x i8>* %C + %tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7) + call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8) + ret <8 x i8> %tmp8 +} + +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) +declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone +declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>) +declare void @bar(<2 x i64> %arg) diff --git a/test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll b/test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll new file mode 100644 index 0000000..4c36a2a --- /dev/null +++ b/test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -o - -filetype=asm | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv8-none--eabi" + +; CHECK-LABEL: fn1: +define arm_aapcs_vfpcc float @fn1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i, float %j) { + ret float %j +; CHECK: vldr s0, [sp, #8] +} + +; CHECK-LABEL: fn2: +define arm_aapcs_vfpcc float @fn2(double %a, double %b, double %c, double %d, double %e, double %f, float %h, <4 x float> %i, float %j) { + ret float %j +; CHECK: vldr s0, [sp, #16] +} + +; CHECK-LABEL: fn3: +define arm_aapcs_vfpcc float @fn3(float %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, float %j) #0 { + ret float %j +; CHECK: vldr s0, [sp, #8] +} diff --git a/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll new file mode 100644 index 0000000..33bfa2f --- /dev/null +++ b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll @@ -0,0 +1,114 @@ +; RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s + +%struct4bytes = type { i32 } +%struct8bytes8align = type { i64 } +%struct12bytes = type { i32, i32, i32 } + +declare void @useIntPtr(%struct4bytes*) +declare void @useLong(i64) +declare void @usePtr(%struct8bytes8align*) + +; a -> r0 +; b -> r1..r3 +; c -> sp+0..sp+7 +define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) { +; CHECK-LABEL: foo1 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add [[SCRATCH:r[0-9]+]], sp, #12 +; CHECK: stm [[SCRATCH]], {r1, r2, r3} +; CHECK: ldr r0, [sp, #24] +; CHECK: ldr r1, [sp, #28] +; CHECK: bl useLong +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 + + call void @useLong(i64 %c) + ret void +} + +; a -> r0 +; b -> r2..r3 +define void @foo2(i32 %a, %struct8bytes8align* byval %b) { +; CHECK-LABEL: foo2 +; CHECK: sub sp, sp, #8 +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #8 +; CHECK: str r3, [sp, #12] +; CHECK: str r2, [sp, #8] +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #8 + + call void @usePtr(%struct8bytes8align* %b) + ret void +} + +; a -> r0..r1 +; b -> r2 +define void @foo3(%struct8bytes8align* byval %a, %struct4bytes* byval %b) { +; CHECK-LABEL: foo3 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add [[SCRATCH:r[0-9]+]], sp, #8 +; CHECK: stm [[SCRATCH]], {r0, r1, r2} +; CHECK: add r0, sp, #8 +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 + + call void @usePtr(%struct8bytes8align* %a) + ret void +} + +; a -> r0 +; b -> r2..r3 +define void @foo4(%struct4bytes* byval %a, %struct8bytes8align* byval %b) { +; CHECK-LABEL: foo4 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: str r0, [sp, #8] +; CHECK: add r0, sp, #16 +; CHECK: str r3, [sp, #20] +; CHECK: str r2, [sp, #16] +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 +; CHECK: mov pc, lr + + call void @usePtr(%struct8bytes8align* %b) + ret void +} + +; a -> r0..r1 +; b -> r2 +; c -> r3 +define void @foo5(%struct8bytes8align* byval %a, %struct4bytes* byval %b, %struct4bytes* byval %c) { +; CHECK-LABEL: foo5 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add [[SCRATCH:r[0-9]+]], sp, #8 +; CHECK: stm [[SCRATCH]], {r0, r1, r2, r3} +; CHECK: add r0, sp, #8 +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 +; CHECK: mov pc, lr + + call void @usePtr(%struct8bytes8align* %a) + ret void +} + +; a..c -> r0..r2 +; d -> sp+0..sp+7 +define void @foo6(i32 %a, i32 %b, i32 %c, %struct8bytes8align* byval %d) { +; CHECK-LABEL: foo6 +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #8 +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: mov pc, lr + + call void @usePtr(%struct8bytes8align* %d) + ret void +} diff --git a/test/CodeGen/ARM/DbgValueOtherTargets.test b/test/CodeGen/ARM/DbgValueOtherTargets.test index bf90891..9ce2459 100644 --- a/test/CodeGen/ARM/DbgValueOtherTargets.test +++ b/test/CodeGen/ARM/DbgValueOtherTargets.test @@ -1 +1 @@ -RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -mtriple=arm-eabi -asm-verbose %S/../Inputs/DbgValueOtherTargets.ll -o - | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/ARM/Windows/aapcs.ll b/test/CodeGen/ARM/Windows/aapcs.ll new file mode 100644 index 0000000..3f9a09f --- /dev/null +++ b/test/CodeGen/ARM/Windows/aapcs.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s + +; AAPCS mandates an 8-byte stack alignment. The alloca is implicitly aligned, +; and no bic is required. + +declare void @callee(i8 *%i) + +define void @caller() { + %i = alloca i8, align 8 + call void @callee(i8* %i) + ret void +} + +; CHECK: sub sp, #8 +; CHECK-NOT: bic + diff --git a/test/CodeGen/ARM/Windows/hard-float.ll b/test/CodeGen/ARM/Windows/hard-float.ll new file mode 100644 index 0000000..f7b7ec2 --- /dev/null +++ b/test/CodeGen/ARM/Windows/hard-float.ll @@ -0,0 +1,10 @@ +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s + +define float @function(float %f, float %g) nounwind { +entry: + %h = fadd float %f, %g + ret float %h +} + +; CHECK: vadd.f32 s0, s0, s1 + diff --git a/test/CodeGen/ARM/Windows/mangling.ll b/test/CodeGen/ARM/Windows/mangling.ll new file mode 100644 index 0000000..ce1fe2e --- /dev/null +++ b/test/CodeGen/ARM/Windows/mangling.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -o - %s | FileCheck %s + +define void @function() nounwind { +entry: + ret void +} + +; CHECK-LABEL: function + diff --git a/test/CodeGen/ARM/Windows/no-aeabi.ll b/test/CodeGen/ARM/Windows/no-aeabi.ll new file mode 100644 index 0000000..4c6676f --- /dev/null +++ b/test/CodeGen/ARM/Windows/no-aeabi.ll @@ -0,0 +1,10 @@ +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s + +define i32 @divide(i32 %i, i32 %j) nounwind { +entry: + %quotient = sdiv i32 %i, %j + ret i32 %quotient +} + +; CHECK-NOT: __aeabi_idiv + diff --git a/test/CodeGen/ARM/Windows/no-arm-mode.ll b/test/CodeGen/ARM/Windows/no-arm-mode.ll new file mode 100644 index 0000000..6db031f --- /dev/null +++ b/test/CodeGen/ARM/Windows/no-arm-mode.ll @@ -0,0 +1,5 @@ +; RUN: not llc -mtriple=armv7-windows-itanium -mcpu=cortex-a9 -o /dev/null %s 2>&1 \ +; RUN: | FileCheck %s + +; CHECK: does not support ARM mode execution + diff --git a/test/CodeGen/ARM/Windows/no-ehabi.ll b/test/CodeGen/ARM/Windows/no-ehabi.ll new file mode 100644 index 0000000..4119b6d --- /dev/null +++ b/test/CodeGen/ARM/Windows/no-ehabi.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -o - %s | FileCheck %s + +declare void @callee(i32 %i) + +define i32 @caller(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, + i32 %p) { +entry: + %q = add nsw i32 %j, %i + %r = add nsw i32 %q, %k + %s = add nsw i32 %r, %l + call void @callee(i32 %s) + %t = add nsw i32 %n, %m + %u = add nsw i32 %t, %o + %v = add nsw i32 %u, %p + call void @callee(i32 %v) + %w = add nsw i32 %v, %s + ret i32 %w +} + +; CHECK-NOT: .save {{{.*}}} + diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll index 019ff61..5e5ca4b 100644 --- a/test/CodeGen/ARM/a15-SD-dep.ll +++ b/test/CodeGen/ARM/a15-SD-dep.ll @@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) { %i2 = fadd <4 x float> %i1, %i1 ret <4 x float> %i2 } + +; Test that DPair can be successfully passed as QPR. +; CHECK-ENABLED-LABEL: test_DPair1: +; CHECK-DISABLED-LABEL: test_DPair1: +define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) { +entry: + %0 = insertelement <4 x float> undef, float %x, i32 1 + %1 = insertelement <4 x float> %0, float %y, i32 0 + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1] + ; CHECK-DISABLED-NOT: vdup + switch i32 %vsout, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb6 + ] + +sw.bb: ; preds = %entry + %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0 + br label %sw.bb6 + +sw.bb6: ; preds = %sw.bb, %entry + %sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ] + %3 = extractelement <4 x float> %sum.0, i32 0 + %conv = fptoui float %3 to i8 + store i8 %conv, i8* %out, align 1 + ret void + +sw.epilog: ; preds = %entry + ret void +} + +; CHECK-ENABLED-LABEL: test_DPair2: +; CHECK-DISABLED-LABEL: test_DPair2: +define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) { +entry: + %0 = insertelement <4 x float> undef, float %x, i32 0 + ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-DISABLED-NOT: vdup + switch i32 %vsout, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0 + br label %sw.bb1 + +sw.bb1: ; preds = %entry, %sw.bb + %sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ] + %2 = extractelement <4 x float> %sum.0, i32 0 + %conv = fptoui float %2 to i8 + store i8 %conv, i8* %out, align 1 + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb1 + ret void +} \ No newline at end of file diff --git a/test/CodeGen/ARM/a15-mla.ll b/test/CodeGen/ARM/a15-mla.ll index b233cc2..9867e27 100644 --- a/test/CodeGen/ARM/a15-mla.ll +++ b/test/CodeGen/ARM/a15-mla.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -float-abi=hard -mcpu=cortex-a15 -mattr=+neon,+neonfp | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=hard -mcpu=cortex-a15 -mattr=+neon,+neonfp %s -o - \ +; RUN: | FileCheck %s ; This test checks that the VMLxForwarting feature is disabled for A15. ; CHECK: fun_a: diff --git a/test/CodeGen/ARM/a15.ll b/test/CodeGen/ARM/a15.ll index 6f816c1..9f0b280 100644 --- a/test/CodeGen/ARM/a15.ll +++ b/test/CodeGen/ARM/a15.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cortex-a15 | FileCheck %s +; RUN: llc -mtriple=arm -mcpu=cortex-a15 %s -o - | FileCheck %s ; CHECK: a define i32 @a(i32 %x) { diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll index 748d258..8fd1da7 100644 --- a/test/CodeGen/ARM/addrmode.ll +++ b/test/CodeGen/ARM/addrmode.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4 +; RUN: llc -mtriple=arm-eabi -stats %s -o - 2>&1 | FileCheck %s define i32 @t1(i32 %a) { %b = mul i32 %a, 9 @@ -14,3 +14,6 @@ define i32 @t2(i32 %a) { %d = load i32* %c ret i32 %d } + +; CHECK: 4 asm-printer + diff --git a/test/CodeGen/ARM/addrspacecast.ll b/test/CodeGen/ARM/addrspacecast.ll index 2e98ba5..7b6237d 100644 --- a/test/CodeGen/ARM/addrspacecast.ll +++ b/test/CodeGen/ARM/addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null ; Check that codegen for an addrspace cast succeeds without error. define <4 x i32 addrspace(1)*> @f (<4 x i32*> %x) { diff --git a/test/CodeGen/ARM/arm-abi-attr.ll b/test/CodeGen/ARM/arm-abi-attr.ll new file mode 100644 index 0000000..f3923ae --- /dev/null +++ b/test/CodeGen/ARM/arm-abi-attr.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=arm-linux < %s | FileCheck %s --check-prefix=APCS +; RUN: llc -mtriple=arm-linux -mattr=apcs < %s | \ +; RUN: FileCheck %s --check-prefix=APCS +; RUN: llc -mtriple=arm-linux-gnueabi -mattr=apcs < %s | \ +; RUN: FileCheck %s --check-prefix=APCS + +; RUN: llc -mtriple=arm-linux-gnueabi < %s | FileCheck %s --check-prefix=AAPCS +; RUN: llc -mtriple=arm-linux-gnueabi -mattr=aapcs < %s | \ +; RUN: FileCheck %s --check-prefix=AAPCS +; RUN: llc -mtriple=arm-linux-gnu -mattr=aapcs < %s | \ +; RUN: FileCheck %s --check-prefix=AAPCS + +; The stack is 8 byte aligned on AAPCS and 4 on APCS, so we should get a BIC +; only on APCS. + +define void @g() { +; APCS: sub sp, sp, #8 +; APCS: bic sp, sp, #7 + +; AAPCS: sub sp, sp, #8 +; AAPCS-NOT: bic + + %c = alloca i8, align 8 + call void @f(i8* %c) + ret void +} + +declare void @f(i8*) diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 88d797e..bf827d6 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s -; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s -; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck -check-prefix=ARM %s +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck -check-prefix=THUMB %s +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ +; RUN: | FileCheck -check-prefix=T2 %s +; RUN: llc -mtriple=thumbv8-eabi %s -o - | FileCheck -check-prefix=V8 %s ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified. diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll index 2e35e39..e869abe 100644 --- a/test/CodeGen/ARM/arm-asm.ll +++ b/test/CodeGen/ARM/arm-asm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define void @frame_dummy() { entry: diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll index 8548642..580f7e7 100644 --- a/test/CodeGen/ARM/arm-modifier.ll +++ b/test/CodeGen/ARM/arm-modifier.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 -no-integrated-as %s -o - | FileCheck %s define i32 @foo(float %scale, float %scale2) nounwind { entry: diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll index fb0f8ff..7decb97 100644 --- a/test/CodeGen/ARM/arm-negative-stride.ll +++ b/test/CodeGen/ARM/arm-negative-stride.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; This loop is rewritten with an indvar which counts down, which ; frees up a register from holding the trip count. diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll index 8b5087f..4d61cb5 100644 --- a/test/CodeGen/ARM/arm-ttype-target2.ll +++ b/test/CodeGen/ARM/arm-ttype-target2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=armv7-none-linux-gnueabi -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s +; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s @_ZTVN10__cxxabiv117__class_type_infoE = external global i8* @_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00" diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 0477d4f..a881d5f 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -55,8 +55,8 @@ define i64 @test3(i64* %ptr, i64 %val) { ; CHECK-LABEL: test3: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]] +; CHECK-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]] +; CHECK-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -65,8 +65,8 @@ define i64 @test3(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test3: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -80,8 +80,8 @@ define i64 @test4(i64* %ptr, i64 %val) { ; CHECK-LABEL: test4: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]] +; CHECK-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]] +; CHECK-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -90,8 +90,8 @@ define i64 @test4(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test4: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -105,8 +105,8 @@ define i64 @test5(i64* %ptr, i64 %val) { ; CHECK-LABEL: test5: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]] +; CHECK-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]] +; CHECK-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -115,8 +115,8 @@ define i64 @test5(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test5: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -151,8 +151,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-LABEL: test7: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: cmp [[REG1]] -; CHECK: cmpeq [[REG2]] +; CHECK-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1 +; CHECK-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2 +; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK: bne ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp @@ -162,16 +163,16 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-THUMB-LABEL: test7: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: cmp [[REG1]] -; CHECK-THUMB: it eq -; CHECK-THUMB: cmpeq [[REG2]] +; CHECK-THUMB-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 +; CHECK-THUMB-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 +; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB: bne ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} - %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst + %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst ret i64 %r } @@ -216,9 +217,18 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK-LABEL: test10: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: blt +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwls [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwle [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -227,9 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test10: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: blt +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movls.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movle [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -243,9 +262,18 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK-LABEL: test11: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: blo +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwls [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwls [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -255,9 +283,18 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test11: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: blo +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movls.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movls [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -271,9 +308,18 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK-LABEL: test12: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: bge +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwhi [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwgt [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -282,9 +328,18 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test12: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: bge +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movhi.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movgt [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -298,9 +353,18 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK-LABEL: test13: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: bhs +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwhi [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwhi [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -309,9 +373,18 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test13: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: bhs +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movhi.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movhi [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll index 51ada69..a473807 100644 --- a/test/CodeGen/ARM/atomic-cmp.ll +++ b/test/CodeGen/ARM/atomic-cmp.ll @@ -10,6 +10,6 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind { ; T2-LABEL: t: ; T2: ldrexb ; T2: strexb - %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic + %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic ret i8 %tmp0 } diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll index 53c7184..45a263d 100644 --- a/test/CodeGen/ARM/atomic-load-store.ll +++ b/test/CodeGen/ARM/atomic-load-store.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=ARM ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO ; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE -; RUN llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4 +; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4 define void @test1(i32* %ptr, i32 %val1) { ; ARM: test1 diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll index 9a79c9f..ac8e949 100644 --- a/test/CodeGen/ARM/atomic-op.ll +++ b/test/CodeGen/ARM/atomic-op.ll @@ -194,3 +194,40 @@ entry: %0 = atomicrmw add i32* %p, i32 1 monotonic ret i32 %0 } + +define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { +; CHECK-LABEL: test_cmpxchg_fail_order: + + %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic +; CHECK: dmb ish +; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]: +; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] +; CHECK: cmp [[OLDVAL]], r1 +; CHECK: bxne lr +; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] +; CHECK: cmp [[SUCCESS]], #0 +; CHECK: bne [[LOOP_BB]] +; CHECK: dmb ish +; CHECK: bx lr + + ret i32 %oldval +} + +define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) { +; CHECK-LABEL: test_cmpxchg_fail_order1: + + %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire +; CHECK-NOT: dmb ish +; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]: +; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] +; CHECK: cmp [[OLDVAL]], r1 +; CHECK: bne [[END_BB:\.?LBB[0-9]+_[0-9]+]] +; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] +; CHECK: cmp [[SUCCESS]], #0 +; CHECK: bne [[LOOP_BB]] +; CHECK: [[END_BB]]: +; CHECK: dmb ish +; CHECK: bx lr + + ret i32 %oldval +} diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll index 3f93929..7922e22 100644 --- a/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-THUMB @var8 = global i8 0 @var16 = global i16 0 @@ -15,7 +15,7 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -38,7 +38,7 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -61,7 +61,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -75,7 +75,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { +define void @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: %old = atomicrmw add i64* @var64, i64 %offset monotonic ; CHECK-NOT: dmb @@ -84,10 +84,10 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 ; CHECK-NEXT: adc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -95,9 +95,9 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { @@ -109,7 +109,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -132,7 +132,7 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -155,7 +155,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -169,7 +169,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { +define void @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: %old = atomicrmw sub i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb @@ -178,10 +178,10 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 ; CHECK-NEXT: sbc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 ; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -189,9 +189,9 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { @@ -203,7 +203,7 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -226,7 +226,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -249,7 +249,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -263,7 +263,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { +define void @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64: %old = atomicrmw and i64* @var64, i64 %offset acquire ; CHECK-NOT: dmb @@ -272,20 +272,20 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 +; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { @@ -297,7 +297,7 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -320,7 +320,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -343,7 +343,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -357,7 +357,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { +define void @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64: %old = atomicrmw or i64* @var64, i64 %offset release ; CHECK-NOT: dmb @@ -366,20 +366,20 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 +; CHECK: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { @@ -391,7 +391,7 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -414,7 +414,7 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -437,7 +437,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -451,7 +451,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { +define void @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64: %old = atomicrmw xor i64* @var64, i64 %offset monotonic ; CHECK-NOT: dmb @@ -460,20 +460,20 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 +; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { @@ -485,7 +485,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] @@ -507,7 +507,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] @@ -529,7 +529,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] @@ -542,7 +542,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { +define void @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64: %old = atomicrmw xchg i64* @var64, i64 %offset acquire ; CHECK-NOT: dmb @@ -551,7 +551,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] @@ -560,28 +560,28 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_min_i8(i8 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: %old = atomicrmw min i8* @var8, i8 %offset acquire ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK-DAG: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK-DAG: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it ge -; CHECK: movge r[[OLDX]], r0 -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; Thumb mode: it le +; CHECK: movle r[[OLDX]], r[[OLD]] +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -591,23 +591,23 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_min_i16(i16 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: %old = atomicrmw min i16* @var16, i16 %offset release ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 +; CHECK: movt [[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it ge -; CHECK: movge r[[OLDX]], r0 -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; Thumb mode: it le +; CHECK: movle r[[OLDX]], r[[OLD]] +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -626,13 +626,13 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lt -; CHECK: movlt r[[NEW]], r[[OLD]] +; Thumb mode: it le +; CHECK: movle r[[NEW]], r[[OLD]] ; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 @@ -643,7 +643,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { +define void @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: %old = atomicrmw min i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb @@ -652,41 +652,50 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: blt .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwls [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwle [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_max_i8(i8 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: %old = atomicrmw max i8* @var8, i8 %offset seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it le -; CHECK: movle r[[OLDX]], r0 -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; Thumb mode: it gt +; CHECK: movgt r[[OLDX]], r[[OLD]] +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -696,7 +705,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_max_i16(i16 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: %old = atomicrmw max i16* @var16, i16 %offset acquire ; CHECK-NOT: dmb @@ -705,13 +714,13 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it le -; CHECK: movle r[[OLDX]], r0 +; Thumb mode: it gt +; CHECK: movgt r[[OLDX]], r[[OLD]] ; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 @@ -731,7 +740,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 @@ -748,7 +757,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { +define void @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64: %old = atomicrmw max i64* @var64, i64 %offset monotonic ; CHECK-NOT: dmb @@ -757,41 +766,50 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: bge .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwhi [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwgt [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_umin_i8(i8 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: %old = atomicrmw umin i8* @var8, i8 %offset monotonic ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lo -; CHECK: movlo r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; Thumb mode: it ls +; CHECK: movls r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -801,23 +819,23 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_umin_i16(i16 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: %old = atomicrmw umin i16* @var16, i16 %offset acquire ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 +; CHECK: movt [[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lo -; CHECK: movlo r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; Thumb mode: it ls +; CHECK: movls r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -836,13 +854,13 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lo -; CHECK: movlo r[[NEW]], r[[OLD]] +; Thumb mode: it ls +; CHECK: movls r[[NEW]], r[[OLD]] ; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 @@ -853,50 +871,59 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { +define void @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64: - %old = atomicrmw umin i64* @var64, i64 %offset acq_rel + %old = atomicrmw umin i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: blo .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwls [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwls [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_umax_i8(i8 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: %old = atomicrmw umax i8* @var8, i8 %offset acq_rel ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi ; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -906,23 +933,23 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_umax_i16(i16 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: %old = atomicrmw umax i16* @var16, i16 %offset monotonic ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 +; CHECK: movt [[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi ; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -941,7 +968,7 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 @@ -958,50 +985,59 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { +define void @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64: - %old = atomicrmw umax i64* @var64, i64 %offset release + %old = atomicrmw umax i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwhi [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwhi [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { +define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: - %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire + %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1011,23 +1047,23 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ret i8 %old } -define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { +define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: - %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst + %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1037,59 +1073,60 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ret i16 %old } -define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { +define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: - %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release + %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic + store i32 %old, i32* @var32 ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD]] - ret i32 %old +; CHECK: str{{(.w)?}} r[[OLD]], + ret void } -define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { +define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: - %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic + %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] +; CHECK: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp [[OLD1]], r0 -; Thumb mode: it eq -; CHECK: cmpeq [[OLD2]], r1 +; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 +; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 +; CHECK: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r2, r3 is a reasonable guess. -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] +; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, [[OLD1]] -; CHECK-NEXT: mov r1, [[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_monotonic_i8() nounwind { @@ -1303,13 +1340,13 @@ define void @test_atomic_store_release_i64(i64 %val) nounwind { store atomic i64 %val, i64* @var64 release, align 8 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var64 +; CHECK: movt [[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1337,7 +1374,7 @@ atomic_ver: ; The key point here is that the second dmb isn't immediately followed by the ; simple_ver basic block, which LLVM attempted to do when DMB had been marked ; with isBarrier. For now, look for something that looks like "somewhere". -; CHECK-NEXT: mov +; CHECK-NEXT: {{mov|bx}} somewhere: %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver] ret i32 %combined diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll index 5befc22..68bf714 100644 --- a/test/CodeGen/ARM/atomicrmw_minmax.ll +++ b/test/CodeGen/ARM/atomicrmw_minmax.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s ; CHECK-LABEL: max: define i32 @max(i8 %ctx, i32* %ptr, i32 %val) @@ -15,7 +15,7 @@ define i32 @min(i8 %ctx, i32* %ptr, i32 %val) { ; CHECK: ldrex ; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]] -; CHECK: movlo {{r[0-9]*}}, [[old]] +; CHECK: movls {{r[0-9]*}}, [[old]] %old = atomicrmw umin i32* %ptr, i32 %val monotonic ret i32 %old } diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll index 3a17d2b..1162aac 100644 --- a/test/CodeGen/ARM/bfc.ll +++ b/test/CodeGen/ARM/bfc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s ; 4278190095 = 0xff00000f define i32 @f1(i32 %a) { diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll index 72a4678..bce09da 100644 --- a/test/CodeGen/ARM/bfi.ll +++ b/test/CodeGen/ARM/bfi.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s +; RUN: llc -mtriple=arm -mattr=+v6t2 %s -o - | FileCheck %s %struct.F = type { [3 x i8], i8 } diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll index 394da9e..46f49e9 100644 --- a/test/CodeGen/ARM/bfx.ll +++ b/test/CodeGen/ARM/bfx.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s define i32 @sbfx1(i32 %a) { ; CHECK: sbfx1 diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll index 1dfd627..691f8be 100644 --- a/test/CodeGen/ARM/bic.ll +++ b/test/CodeGen/ARM/bic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1(i32 %a, i32 %b) { %tmp = xor i32 %b, 4294967295 diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll index ce1b2ad..14aa27e 100644 --- a/test/CodeGen/ARM/bits.ll +++ b/test/CodeGen/ARM/bits.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1(i32 %a, i32 %b) { entry: diff --git a/test/CodeGen/ARM/build-attributes-encoding.s b/test/CodeGen/ARM/build-attributes-encoding.s index 5ad51b2..34a1ad3 100644 --- a/test/CodeGen/ARM/build-attributes-encoding.s +++ b/test/CodeGen/ARM/build-attributes-encoding.s @@ -4,7 +4,7 @@ // RUN: | llvm-readobj -s -sd | FileCheck %s // Tag_CPU_name (=5) -.cpu Cortex-A8 +.cpu cortex-a8 // Tag_CPU_arch (=6) .eabi_attribute 6, 10 @@ -61,7 +61,7 @@ .eabi_attribute 110, 160 // Check that tags > 128 are encoded properly -.eabi_attribute 129, 1 +.eabi_attribute 129, "1" .eabi_attribute 250, 1 // CHECK: Section { @@ -71,15 +71,15 @@ // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 // CHECK-NEXT: Offset: 0x34 -// CHECK-NEXT: Size: 70 +// CHECK-NEXT: Size: 71 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 // CHECK-NEXT: AddressAlignment: 1 // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 41450000 00616561 62690001 3B000000 +// CHECK-NEXT: 0000: 41460000 00616561 62690001 3C000000 // CHECK-NEXT: 0010: 05434F52 5445582D 41380006 0A074108 // CHECK-NEXT: 0020: 0109020A 030C0214 01150117 01180119 // CHECK-NEXT: 0030: 011B001C 0124012A 012C0244 036EA001 -// CHECK-NEXT: 0040: 810101FA 0101 +// CHECK-NEXT: 0040: 81013100 FA0101 // CHECK-NEXT: ) diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll new file mode 100644 index 0000000..3e825e8 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes.ll @@ -0,0 +1,458 @@ +; This tests that MC/asm header conversion is smooth and that the +; build attributes are correct + +; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale | FileCheck %s --check-prefix=XSCALE +; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6 +; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M +; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8 +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON +; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,+d16 | FileCheck %s --check-prefix=CORTEX-A5-NONEON +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A5-NOFPU +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A12-NOFPU +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15 +; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0 +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3 +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD +; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57 +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 + +; XSCALE: .eabi_attribute 6, 5 +; XSCALE: .eabi_attribute 8, 1 +; XSCALE: .eabi_attribute 9, 1 + +; V6: .eabi_attribute 6, 6 +; V6: .eabi_attribute 8, 1 +; V6: .eabi_attribute 24, 1 +; V6: .eabi_attribute 25, 1 +; V6-NOT: .eabi_attribute 27 +; V6-NOT: .eabi_attribute 28 +; V6-NOT: .eabi_attribute 36 +; V6-NOT: .eabi_attribute 42 +; V6-NOT: .eabi_attribute 68 + +; V6M: .eabi_attribute 6, 12 +; V6M-NOT: .eabi_attribute 7 +; V6M: .eabi_attribute 8, 0 +; V6M: .eabi_attribute 9, 1 +; V6M: .eabi_attribute 24, 1 +; V6M: .eabi_attribute 25, 1 +; V6M-NOT: .eabi_attribute 27 +; V6M-NOT: .eabi_attribute 28 +; V6M-NOT: .eabi_attribute 36 +; V6M-NOT: .eabi_attribute 42 +; V6M-NOT: .eabi_attribute 68 + +; ARM1156T2F-S: .cpu arm1156t2f-s +; ARM1156T2F-S: .eabi_attribute 6, 8 +; ARM1156T2F-S: .eabi_attribute 8, 1 +; ARM1156T2F-S: .eabi_attribute 9, 2 +; ARM1156T2F-S: .fpu vfpv2 +; ARM1156T2F-S: .eabi_attribute 20, 1 +; ARM1156T2F-S: .eabi_attribute 21, 1 +; ARM1156T2F-S: .eabi_attribute 23, 3 +; ARM1156T2F-S: .eabi_attribute 24, 1 +; ARM1156T2F-S: .eabi_attribute 25, 1 +; ARM1156T2F-S-NOT: .eabi_attribute 27 +; ARM1156T2F-S-NOT: .eabi_attribute 28 +; ARM1156T2F-S-NOT: .eabi_attribute 36 +; ARM1156T2F-S-NOT: .eabi_attribute 42 +; ARM1156T2F-S-NOT: .eabi_attribute 68 + +; V7M: .eabi_attribute 6, 10 +; V7M: .eabi_attribute 7, 77 +; V7M: .eabi_attribute 8, 0 +; V7M: .eabi_attribute 9, 2 +; V7M: .eabi_attribute 24, 1 +; V7M: .eabi_attribute 25, 1 +; V7M-NOT: .eabi_attribute 27 +; V7M-NOT: .eabi_attribute 28 +; V7M-NOT: .eabi_attribute 36 +; V7M-NOT: .eabi_attribute 42 +; V7M-NOT: .eabi_attribute 44 +; V7M-NOT: .eabi_attribute 68 + +; V7: .syntax unified +; V7: .eabi_attribute 6, 10 +; V7: .eabi_attribute 20, 1 +; V7: .eabi_attribute 21, 1 +; V7: .eabi_attribute 23, 3 +; V7: .eabi_attribute 24, 1 +; V7: .eabi_attribute 25, 1 +; V7-NOT: .eabi_attribute 27 +; V7-NOT: .eabi_attribute 28 +; V7-NOT: .eabi_attribute 36 +; V7-NOT: .eabi_attribute 42 +; V7-NOT: .eabi_attribute 68 + +; V8: .syntax unified +; V8: .eabi_attribute 6, 14 + +; Vt8: .syntax unified +; Vt8: .eabi_attribute 6, 14 + +; V8-FPARMv8: .syntax unified +; V8-FPARMv8: .eabi_attribute 6, 14 +; V8-FPARMv8: .fpu fp-armv8 + +; V8-NEON: .syntax unified +; V8-NEON: .eabi_attribute 6, 14 +; V8-NEON: .fpu neon +; V8-NEON: .eabi_attribute 12, 3 + +; V8-FPARMv8-NEON: .syntax unified +; V8-FPARMv8-NEON: .eabi_attribute 6, 14 +; V8-FPARMv8-NEON: .fpu neon-fp-armv8 +; V8-FPARMv8-NEON: .eabi_attribute 12, 3 + +; V8-FPARMv8-NEON-CRYPTO: .syntax unified +; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 6, 14 +; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8 +; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3 + +; Tag_CPU_arch 'ARMv7' +; CORTEX-A7-CHECK: .eabi_attribute 6, 10 +; CORTEX-A7-NOFPU: .eabi_attribute 6, 10 +; CORTEX-A7-FPUV4: .eabi_attribute 6, 10 + +; Tag_CPU_arch_profile 'A' +; CORTEX-A7-CHECK: .eabi_attribute 7, 65 +; CORTEX-A7-NOFPU: .eabi_attribute 7, 65 +; CORTEX-A7-FPUV4: .eabi_attribute 7, 65 + +; Tag_ARM_ISA_use +; CORTEX-A7-CHECK: .eabi_attribute 8, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 8, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 8, 1 + +; Tag_THUMB_ISA_use +; CORTEX-A7-CHECK: .eabi_attribute 9, 2 +; CORTEX-A7-NOFPU: .eabi_attribute 9, 2 +; CORTEX-A7-FPUV4: .eabi_attribute 9, 2 + +; CORTEX-A7-CHECK: .fpu neon-vfpv4 +; CORTEX-A7-NOFPU-NOT: .fpu +; CORTEX-A7-FPUV4: .fpu vfpv4 + +; Tag_ABI_FP_denormal +; CORTEX-A7-CHECK: .eabi_attribute 20, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 20, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 20, 1 + +; Tag_ABI_FP_exceptions +; CORTEX-A7-CHECK: .eabi_attribute 21, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 21, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 21, 1 + +; Tag_ABI_FP_number_model +; CORTEX-A7-CHECK: .eabi_attribute 23, 3 +; CORTEX-A7-NOFPU: .eabi_attribute 23, 3 +; CORTEX-A7-FPUV4: .eabi_attribute 23, 3 + +; Tag_ABI_align_needed +; CORTEX-A7-CHECK: .eabi_attribute 24, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 24, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 24, 1 + +; Tag_ABI_align_preserved +; CORTEX-A7-CHECK: .eabi_attribute 25, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 25, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 25, 1 + +; Tag_FP_HP_extension +; CORTEX-A7-CHECK: .eabi_attribute 36, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 36, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 + +; Tag_MPextension_use +; CORTEX-A7-CHECK: .eabi_attribute 42, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 42, 1 + +; Tag_DIV_use +; CORTEX-A7-CHECK: .eabi_attribute 44, 2 +; CORTEX-A7-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A7-FPUV4: .eabi_attribute 44, 2 + +; Tag_Virtualization_use +; CORTEX-A7-CHECK: .eabi_attribute 68, 3 +; CORTEX-A7-NOFPU: .eabi_attribute 68, 3 +; CORTEX-A7-FPUV4: .eabi_attribute 68, 3 + +; CORTEX-A5-DEFAULT: .cpu cortex-a5 +; CORTEX-A5-DEFAULT: .eabi_attribute 6, 10 +; CORTEX-A5-DEFAULT: .eabi_attribute 7, 65 +; CORTEX-A5-DEFAULT: .eabi_attribute 8, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 9, 2 +; CORTEX-A5-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A5-DEFAULT: .eabi_attribute 20, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 21, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 23, 3 +; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1 + +; CORTEX-A5-NONEON: .cpu cortex-a5 +; CORTEX-A5-NONEON: .eabi_attribute 6, 10 +; CORTEX-A5-NONEON: .eabi_attribute 7, 65 +; CORTEX-A5-NONEON: .eabi_attribute 8, 1 +; CORTEX-A5-NONEON: .eabi_attribute 9, 2 +; CORTEX-A5-NONEON: .fpu vfpv4-d16 +; CORTEX-A5-NONEON: .eabi_attribute 20, 1 +; CORTEX-A5-NONEON: .eabi_attribute 21, 1 +; CORTEX-A5-NONEON: .eabi_attribute 23, 3 +; CORTEX-A5-NONEON: .eabi_attribute 24, 1 +; CORTEX-A5-NONEON: .eabi_attribute 25, 1 +; CORTEX-A5-NONEON: .eabi_attribute 42, 1 +; CORTEX-A5-NONEON: .eabi_attribute 68, 1 + +; CORTEX-A5-NOFPU: .cpu cortex-a5 +; CORTEX-A5-NOFPU: .eabi_attribute 6, 10 +; CORTEX-A5-NOFPU: .eabi_attribute 7, 65 +; CORTEX-A5-NOFPU: .eabi_attribute 8, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 9, 2 +; CORTEX-A5-NOFPU-NOT: .fpu +; CORTEX-A5-NOFPU: .eabi_attribute 20, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 21, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 23, 3 +; CORTEX-A5-NOFPU: .eabi_attribute 24, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 25, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 68, 1 + +; CORTEX-A9-SOFT: .cpu cortex-a9 +; CORTEX-A9-SOFT: .eabi_attribute 6, 10 +; CORTEX-A9-SOFT: .eabi_attribute 7, 65 +; CORTEX-A9-SOFT: .eabi_attribute 8, 1 +; CORTEX-A9-SOFT: .eabi_attribute 9, 2 +; CORTEX-A9-SOFT: .fpu neon +; CORTEX-A9-SOFT: .eabi_attribute 20, 1 +; CORTEX-A9-SOFT: .eabi_attribute 21, 1 +; CORTEX-A9-SOFT: .eabi_attribute 23, 3 +; CORTEX-A9-SOFT: .eabi_attribute 24, 1 +; CORTEX-A9-SOFT: .eabi_attribute 25, 1 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 28 +; CORTEX-A9-SOFT: .eabi_attribute 36, 1 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 42 +; CORTEX-A9-SOFT: .eabi_attribute 68, 1 + +; CORTEX-A9-HARD: .cpu cortex-a9 +; CORTEX-A9-HARD: .eabi_attribute 6, 10 +; CORTEX-A9-HARD: .eabi_attribute 7, 65 +; CORTEX-A9-HARD: .eabi_attribute 8, 1 +; CORTEX-A9-HARD: .eabi_attribute 9, 2 +; CORTEX-A9-HARD: .fpu neon +; CORTEX-A9-HARD: .eabi_attribute 20, 1 +; CORTEX-A9-HARD: .eabi_attribute 21, 1 +; CORTEX-A9-HARD: .eabi_attribute 23, 3 +; CORTEX-A9-HARD: .eabi_attribute 24, 1 +; CORTEX-A9-HARD: .eabi_attribute 25, 1 +; CORTEX-A9-HARD-NOT: .eabi_attribute 27 +; CORTEX-A9-HARD: .eabi_attribute 28, 1 +; CORTEX-A9-HARD: .eabi_attribute 36, 1 +; CORTEX-A9-HARD-NOT: .eabi_attribute 42 +; CORTEX-A9-HARD: .eabi_attribute 68, 1 + +; CORTEX-A9-MP: .cpu cortex-a9-mp +; CORTEX-A9-MP: .eabi_attribute 6, 10 +; CORTEX-A9-MP: .eabi_attribute 7, 65 +; CORTEX-A9-MP: .eabi_attribute 8, 1 +; CORTEX-A9-MP: .eabi_attribute 9, 2 +; CORTEX-A9-MP: .fpu neon +; CORTEX-A9-MP: .eabi_attribute 20, 1 +; CORTEX-A9-MP: .eabi_attribute 21, 1 +; CORTEX-A9-MP: .eabi_attribute 23, 3 +; CORTEX-A9-MP: .eabi_attribute 24, 1 +; CORTEX-A9-MP: .eabi_attribute 25, 1 +; CORTEX-A9-MP-NOT: .eabi_attribute 27 +; CORTEX-A9-MP-NOT: .eabi_attribute 28 +; CORTEX-A9-MP: .eabi_attribute 36, 1 +; CORTEX-A9-MP: .eabi_attribute 42, 1 +; CORTEX-A9-MP: .eabi_attribute 68, 1 + +; CORTEX-A12-DEFAULT: .cpu cortex-a12 +; CORTEX-A12-DEFAULT: .eabi_attribute 6, 10 +; CORTEX-A12-DEFAULT: .eabi_attribute 7, 65 +; CORTEX-A12-DEFAULT: .eabi_attribute 8, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 9, 2 +; CORTEX-A12-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A12-DEFAULT: .eabi_attribute 20, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 21, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 23, 3 +; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2 +; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3 + +; CORTEX-A12-NOFPU: .cpu cortex-a12 +; CORTEX-A12-NOFPU: .eabi_attribute 6, 10 +; CORTEX-A12-NOFPU: .eabi_attribute 7, 65 +; CORTEX-A12-NOFPU: .eabi_attribute 8, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 9, 2 +; CORTEX-A12-NOFPU-NOT: .fpu +; CORTEX-A12-NOFPU: .eabi_attribute 20, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 21, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 23, 3 +; CORTEX-A12-NOFPU: .eabi_attribute 24, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 25, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A12-NOFPU: .eabi_attribute 68, 3 + +; CORTEX-A15: .cpu cortex-a15 +; CORTEX-A15: .eabi_attribute 6, 10 +; CORTEX-A15: .eabi_attribute 7, 65 +; CORTEX-A15: .eabi_attribute 8, 1 +; CORTEX-A15: .eabi_attribute 9, 2 +; CORTEX-A15: .fpu neon-vfpv4 +; CORTEX-A15: .eabi_attribute 20, 1 +; CORTEX-A15: .eabi_attribute 21, 1 +; CORTEX-A15: .eabi_attribute 23, 3 +; CORTEX-A15: .eabi_attribute 24, 1 +; CORTEX-A15: .eabi_attribute 25, 1 +; CORTEX-A15-NOT: .eabi_attribute 27 +; CORTEX-A15-NOT: .eabi_attribute 28 +; CORTEX-A15: .eabi_attribute 36, 1 +; CORTEX-A15: .eabi_attribute 42, 1 +; CORTEX-A15: .eabi_attribute 44, 2 +; CORTEX-A15: .eabi_attribute 68, 3 + +; CORTEX-M0: .cpu cortex-m0 +; CORTEX-M0: .eabi_attribute 6, 12 +; CORTEX-M0-NOT: .eabi_attribute 7 +; CORTEX-M0: .eabi_attribute 8, 0 +; CORTEX-M0: .eabi_attribute 9, 1 +; CORTEX-M0: .eabi_attribute 24, 1 +; CORTEX-M0: .eabi_attribute 25, 1 +; CORTEX-M0-NOT: .eabi_attribute 27 +; CORTEX-M0-NOT: .eabi_attribute 28 +; CORTEX-M0-NOT: .eabi_attribute 36 +; CORTEX-M0-NOT: .eabi_attribute 42 +; CORTEX-M0-NOT: .eabi_attribute 68 + +; CORTEX-M3: .cpu cortex-m3 +; CORTEX-M3: .eabi_attribute 6, 10 +; CORTEX-M3: .eabi_attribute 7, 77 +; CORTEX-M3: .eabi_attribute 8, 0 +; CORTEX-M3: .eabi_attribute 9, 2 +; CORTEX-M3: .eabi_attribute 20, 1 +; CORTEX-M3: .eabi_attribute 21, 1 +; CORTEX-M3: .eabi_attribute 23, 3 +; CORTEX-M3: .eabi_attribute 24, 1 +; CORTEX-M3: .eabi_attribute 25, 1 +; CORTEX-M3-NOT: .eabi_attribute 27 +; CORTEX-M3-NOT: .eabi_attribute 28 +; CORTEX-M3-NOT: .eabi_attribute 36 +; CORTEX-M3-NOT: .eabi_attribute 42 +; CORTEX-M3-NOT: .eabi_attribute 44 +; CORTEX-M3-NOT: .eabi_attribute 68 + +; CORTEX-M4-SOFT: .cpu cortex-m4 +; CORTEX-M4-SOFT: .eabi_attribute 6, 13 +; CORTEX-M4-SOFT: .eabi_attribute 7, 77 +; CORTEX-M4-SOFT: .eabi_attribute 8, 0 +; CORTEX-M4-SOFT: .eabi_attribute 9, 2 +; CORTEX-M4-SOFT: .fpu vfpv4-d16 +; CORTEX-M4-SOFT: .eabi_attribute 20, 1 +; CORTEX-M4-SOFT: .eabi_attribute 21, 1 +; CORTEX-M4-SOFT: .eabi_attribute 23, 3 +; CORTEX-M4-SOFT: .eabi_attribute 24, 1 +; CORTEX-M4-SOFT: .eabi_attribute 25, 1 +; CORTEX-M4-SOFT: .eabi_attribute 27, 1 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 28 +; CORTEX-M4-SOFT: .eabi_attribute 36, 1 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 44 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 + +; CORTEX-M4-HARD: .cpu cortex-m4 +; CORTEX-M4-HARD: .eabi_attribute 6, 13 +; CORTEX-M4-HARD: .eabi_attribute 7, 77 +; CORTEX-M4-HARD: .eabi_attribute 8, 0 +; CORTEX-M4-HARD: .eabi_attribute 9, 2 +; CORTEX-M4-HARD: .fpu vfpv4-d16 +; CORTEX-M4-HARD: .eabi_attribute 20, 1 +; CORTEX-M4-HARD: .eabi_attribute 21, 1 +; CORTEX-M4-HARD: .eabi_attribute 23, 3 +; CORTEX-M4-HARD: .eabi_attribute 24, 1 +; CORTEX-M4-HARD: .eabi_attribute 25, 1 +; CORTEX-M4-HARD: .eabi_attribute 27, 1 +; CORTEX-M4-HARD: .eabi_attribute 28, 1 +; CORTEX-M4-HARD: .eabi_attribute 36, 1 +; CORTEX-M4-HARD-NOT: .eabi_attribute 42 +; CORTEX-M4-HARD-NOT: .eabi_attribute 44 +; CORTEX-M4-HARD-NOT: .eabi_attribute 68 + +; CORTEX-R5: .cpu cortex-r5 +; CORTEX-R5: .eabi_attribute 6, 10 +; CORTEX-R5: .eabi_attribute 7, 82 +; CORTEX-R5: .eabi_attribute 8, 1 +; CORTEX-R5: .eabi_attribute 9, 2 +; CORTEX-R5: .fpu vfpv3-d16 +; CORTEX-R5: .eabi_attribute 20, 1 +; CORTEX-R5: .eabi_attribute 21, 1 +; CORTEX-R5: .eabi_attribute 23, 3 +; CORTEX-R5: .eabi_attribute 24, 1 +; CORTEX-R5: .eabi_attribute 25, 1 +; CORTEX-R5: .eabi_attribute 27, 1 +; CORTEX-R5-NOT: .eabi_attribute 28 +; CORTEX-R5-NOT: .eabi_attribute 36 +; CORTEX-R5-NOT: .eabi_attribute 42 +; CORTEX-R5: .eabi_attribute 44, 2 +; CORTEX-R5-NOT: .eabi_attribute 68 + +; CORTEX-A53: .cpu cortex-a53 +; CORTEX-A53: .eabi_attribute 6, 14 +; CORTEX-A53: .eabi_attribute 7, 65 +; CORTEX-A53: .eabi_attribute 8, 1 +; CORTEX-A53: .eabi_attribute 9, 2 +; CORTEX-A53: .fpu crypto-neon-fp-armv8 +; CORTEX-A53: .eabi_attribute 12, 3 +; CORTEX-A53: .eabi_attribute 24, 1 +; CORTEX-A53: .eabi_attribute 25, 1 +; CORTEX-A53-NOT: .eabi_attribute 27 +; CORTEX-A53-NOT: .eabi_attribute 28 +; CORTEX-A53: .eabi_attribute 36, 1 +; CORTEX-A53: .eabi_attribute 42, 1 +; CORTEX-A53-NOT: .eabi_attribute 44 +; CORTEX-A53: .eabi_attribute 68, 3 + +; CORTEX-A57: .cpu cortex-a57 +; CORTEX-A57: .eabi_attribute 6, 14 +; CORTEX-A57: .eabi_attribute 7, 65 +; CORTEX-A57: .eabi_attribute 8, 1 +; CORTEX-A57: .eabi_attribute 9, 2 +; CORTEX-A57: .fpu crypto-neon-fp-armv8 +; CORTEX-A57: .eabi_attribute 12, 3 +; CORTEX-A57: .eabi_attribute 24, 1 +; CORTEX-A57: .eabi_attribute 25, 1 +; CORTEX-A57-NOT: .eabi_attribute 27 +; CORTEX-A57-NOT: .eabi_attribute 28 +; CORTEX-A57: .eabi_attribute 36, 1 +; CORTEX-A57: .eabi_attribute 42, 1 +; CORTEX-A57-NOT: .eabi_attribute 44 +; CORTEX-A57: .eabi_attribute 68, 3 + +define i32 @f(i64 %z) { + ret i32 0 +} diff --git a/test/CodeGen/ARM/cache-intrinsic.ll b/test/CodeGen/ARM/cache-intrinsic.ll new file mode 100644 index 0000000..6048917 --- /dev/null +++ b/test/CodeGen/ARM/cache-intrinsic.ll @@ -0,0 +1,26 @@ +; RUN: llc %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7--linux-gnueabihf" + +@buffer = global [32 x i8] c"This is a largely unused buffer\00", align 1 +@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str1 = private unnamed_addr constant [25 x i8] c"Still, largely unused...\00", align 1 + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0)) + %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8]* @.str1, i32 0, i32 0)) #3 + call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3 + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0)) + ret i32 0 +} + +; CHECK: __clear_cache + +declare i32 @printf(i8*, ...) + +declare i8* @strcpy(i8*, i8*) + +declare void @llvm.clear_cache(i8*, i8*) diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index d463602..40694bf 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6 -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF -; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D +; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 | FileCheck %s -check-prefix=CHECKV6 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 \ +; RUN: | FileCheck %s -check-prefix=CHECKELF ; Enable tailcall optimization for iOS 5.0 ; rdar://9120031 diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll index 107e79a..f6301cf 100644 --- a/test/CodeGen/ARM/call.ll +++ b/test/CodeGen/ARM/call.ll @@ -1,7 +1,11 @@ -; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s -check-prefix=CHECKV4 -; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5 -; RUN: llc < %s -mtriple=armv6-linux-gnueabi\ -; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECKV4 + +; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECKV5 + +; RUN: llc -mtriple=armv6-linux-gnueabi -relocation-model=pic %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECKELF @t = weak global i32 ()* null ; [#uses=1] diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index f67987f..e344b08 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll index 5b6a584..68e8c7c 100644 --- a/test/CodeGen/ARM/clz.ll +++ b/test/CodeGen/ARM/clz.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s declare i32 @llvm.ctlz.i32(i32, i1) diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll index 86106a0..606c9bc 100644 --- a/test/CodeGen/ARM/coalesce-dbgvalue.ll +++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll @@ -81,7 +81,7 @@ attributes #3 = { nounwind } !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 182024) (llvm/trunk 182023)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99] !1 = metadata !{metadata !"pr16110.c", metadata !"/d/b"} -!2 = metadata !{i32 0} +!2 = metadata !{} !3 = metadata !{metadata !4} !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"pr16110", metadata !"pr16110", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @pr16110, null, null, metadata !9, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110] !5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/d/b/pr16110.c] diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll index fac2bc5..323eb1f 100644 --- a/test/CodeGen/ARM/compare-call.ll +++ b/test/CodeGen/ARM/compare-call.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \ -; RUN: grep vcmpe.f32 +; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s define void @test3(float* %glob, i32 %X) { entry: @@ -18,3 +17,6 @@ UnifiedReturnBlock: ; preds = %entry } declare i32 @bar(...) + +; CHECK: vcmpe.f32 + diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll index 974bdd7..27b6e9b 100644 --- a/test/CodeGen/ARM/constantfp.ll +++ b/test/CodeGen/ARM/constantfp.ll @@ -15,7 +15,7 @@ define arm_aapcs_vfpcc float @test_vmov_imm() { ; CHECK: vmov.i32 d0, #0 ; CHECK-NONEON-LABEL: test_vmov_imm: -; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}} +; CHECK-NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}} ret float 0.0 } @@ -24,7 +24,7 @@ define arm_aapcs_vfpcc float @test_vmvn_imm() { ; CHECK: vmvn.i32 d0, #0xb0000000 ; CHECK-NONEON-LABEL: test_vmvn_imm: -; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}} +; CHECK-NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}} ret float 8589934080.0 } @@ -33,7 +33,7 @@ define arm_aapcs_vfpcc double @test_vmov_f64() { ; CHECK: vmov.f64 d0, #1.0 ; CHECK-NONEON-LABEL: test_vmov_f64: -; CHECK_NONEON: vmov.f64 d0, #1.0 +; CHECK-NONEON: vmov.f64 d0, #1.0 ret double 1.0 } @@ -43,7 +43,7 @@ define arm_aapcs_vfpcc double @test_vmov_double_imm() { ; CHECK: vmov.i32 d0, #0 ; CHECK-NONEON-LABEL: test_vmov_double_imm: -; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} +; CHECK-NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} ret double 0.0 } @@ -52,7 +52,7 @@ define arm_aapcs_vfpcc double @test_vmvn_double_imm() { ; CHECK: vmvn.i32 d0, #0xb0000000 ; CHECK-NONEON-LABEL: test_vmvn_double_imm: -; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} +; CHECK-NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} ret double 0x4fffffff4fffffff } @@ -63,6 +63,6 @@ define arm_aapcs_vfpcc double @test_notvmvn_double_imm() { ; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} ; CHECK-NONEON-LABEL: test_notvmvn_double_imm: -; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} +; CHECK-NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}} ret double 0x4fffffffffffffff } diff --git a/test/CodeGen/ARM/crash-O0.ll b/test/CodeGen/ARM/crash-O0.ll index 8bce4e0..8855bb9 100644 --- a/test/CodeGen/ARM/crash-O0.ll +++ b/test/CodeGen/ARM/crash-O0.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim +; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim -no-integrated-as target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32" target triple = "armv6-apple-darwin10" diff --git a/test/CodeGen/ARM/cse-ldrlit.ll b/test/CodeGen/ARM/cse-ldrlit.ll new file mode 100644 index 0000000..ea8c0ca --- /dev/null +++ b/test/CodeGen/ARM/cse-ldrlit.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=thumbv6m-apple-none-macho -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-PIC +; RUN: llc -mtriple=arm-apple-none-macho -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-ARM-PIC +; RUN: llc -mtriple=thumbv6m-apple-none-macho -relocation-model=dynamic-no-pic -o - %s | FileCheck %s --check-prefix=CHECK-DYNAMIC +; RUN: llc -mtriple=arm-apple-none-macho -relocation-model=dynamic-no-pic -o - %s | FileCheck %s --check-prefix=CHECK-DYNAMIC +; RUN: llc -mtriple=thumbv6m-apple-none-macho -relocation-model=static -o - %s | FileCheck %s --check-prefix=CHECK-STATIC +; RUN: llc -mtriple=arm-apple-none-macho -relocation-model=static -o - %s | FileCheck %s --check-prefix=CHECK-STATIC +@var = global [16 x i32] zeroinitializer + +declare void @bar(i32*) + +define void @foo() { + %flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1) + %tst = icmp eq i32 %flag, 0 + br i1 %tst, label %true, label %false +true: + tail call void @bar(i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 4)) + ret void +false: + ret void +} + +; CHECK-THUMB-PIC-LABEL: foo: +; CHECK-THUMB-PIC: ldr r0, LCPI0_0 +; CHECK-THUMB-PIC: LPC0_0: +; CHECK-THUMB-PIC-NEXT: add r0, pc +; CHECK-THUMB-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-THUMB-PIC: LCPI0_0: +; CHECK-THUMB-PIC-NEXT: .long _var-(LPC0_0+4) +; CHECK-THUMB-PIC-NOT: LCPI0_1 + + +; CHECK-ARM-PIC-LABEL: foo: +; CHECK-ARM-PIC: ldr [[VAR_OFFSET:r[0-9]+]], LCPI0_0 +; CHECK-ARM-PIC: LPC0_0: +; CHECK-ARM-PIC-NEXT: ldr r0, [pc, [[VAR_OFFSET]]] +; CHECK-ARM-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-ARM-PIC: LCPI0_0: +; CHECK-ARM-PIC-NEXT: .long _var-(LPC0_0+8) +; CHECK-ARM-PIC-NOT: LCPI0_1 + + +; CHECK-DYNAMIC-LABEL: foo: +; CHECK-DYNAMIC: ldr r0, LCPI0_0 +; CHECK-DYNAMIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-DYNAMIC: LCPI0_0: +; CHECK-DYNAMIC-NEXT: .long _var +; CHECK-DYNAMIC-NOT: LCPI0_1 + + +; CHECK-STATIC-LABEL: foo: +; CHECK-STATIC: ldr r0, LCPI0_0 +; CHECK-STATIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-STATIC: LCPI0_0: +; CHECK-STATIC-NEXT: .long _var{{$}} +; CHECK-STATIC-NOT: LCPI0_1 + + diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll index 2c7efc7..2d88b03 100644 --- a/test/CodeGen/ARM/ctz.ll +++ b/test/CodeGen/ARM/ctz.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s declare i32 @llvm.cttz.i32(i32, i1) diff --git a/test/CodeGen/ARM/debug-frame-large-stack.ll b/test/CodeGen/ARM/debug-frame-large-stack.ll new file mode 100644 index 0000000..5bafce9 --- /dev/null +++ b/test/CodeGen/ARM/debug-frame-large-stack.ll @@ -0,0 +1,99 @@ +; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-none-eabi -disable-fp-elim| FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-none-eabi | FileCheck %s --check-prefix=CHECK-ARM-FP-ELIM + +define void @test1() { + %tmp = alloca [ 64 x i32 ] , align 4 + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/large.c] [DW_LANG_C99] +!1 = metadata !{metadata !"large.c", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test1", metadata !"test1", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @test1, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test1] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/large.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{null} +!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!10 = metadata !{metadata !"clang version 3.5 "} +!11 = metadata !{i32 2, i32 0, metadata !4, null} + +; CHECK-ARM-LABEL: test1: +; CHECK-ARM: .cfi_startproc +; CHECK-ARM: sub sp, sp, #256 +; CHECK-ARM: .cfi_endproc + +; CHECK-ARM-FP-ELIM-LABEL: test1: +; CHECK-ARM-FP-ELIM: .cfi_startproc +; CHECK-ARM-FP-ELIM: sub sp, sp, #256 +; CHECK-ARM-FP-ELIM: .cfi_endproc + +define void @test2() { + %tmp = alloca [ 4168 x i8 ] , align 4 + ret void +} + +; CHECK-ARM-LABEL: test2: +; CHECK-ARM: .cfi_startproc +; CHECK-ARM: push {r4, r5} +; CHECK-ARM: .cfi_def_cfa_offset 8 +; CHECK-ARM: .cfi_offset r5, -4 +; CHECK-ARM: .cfi_offset r4, -8 +; CHECK-ARM: sub sp, sp, #72 +; CHECK-ARM: sub sp, sp, #4096 +; CHECK-ARM: .cfi_def_cfa_offset 4176 +; CHECK-ARM: .cfi_endproc + +; CHECK-ARM-FP_ELIM-LABEL: test2: +; CHECK-ARM-FP_ELIM: .cfi_startproc +; CHECK-ARM-FP_ELIM: push {r4, r5} +; CHECK-ARM-FP_ELIM: .cfi_def_cfa_offset 8 +; CHECK-ARM-FP_ELIM: .cfi_offset 54, -4 +; CHECK-ARM-FP_ELIM: .cfi_offset r4, -8 +; CHECK-ARM-FP_ELIM: sub sp, sp, #72 +; CHECK-ARM-FP_ELIM: sub sp, sp, #4096 +; CHECK-ARM-FP_ELIM: .cfi_def_cfa_offset 4176 +; CHECK-ARM-FP_ELIM: .cfi_endproc + +define i32 @test3() { + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32* %tmp + ret i32 %tmp1 +} + +; CHECK-ARM-LABEL: test3: +; CHECK-ARM: .cfi_startproc +; CHECK-ARM: push {r4, r5, r11} +; CHECK-ARM: .cfi_def_cfa_offset 12 +; CHECK-ARM: .cfi_offset r11, -4 +; CHECK-ARM: .cfi_offset r5, -8 +; CHECK-ARM: .cfi_offset r4, -12 +; CHECK-ARM: add r11, sp, #8 +; CHECK-ARM: .cfi_def_cfa r11, 4 +; CHECK-ARM: sub sp, sp, #20 +; CHECK-ARM: sub sp, sp, #805306368 +; CHECK-ARM: bic sp, sp, #15 +; CHECK-ARM: .cfi_endproc + +; CHECK-ARM-FP-ELIM-LABEL: test3: +; CHECK-ARM-FP-ELIM: .cfi_startproc +; CHECK-ARM-FP-ELIM: push {r4, r5, r11} +; CHECK-ARM-FP-ELIM: .cfi_def_cfa_offset 12 +; CHECK-ARM-FP-ELIM: .cfi_offset r11, -4 +; CHECK-ARM-FP-ELIM: .cfi_offset r5, -8 +; CHECK-ARM-FP-ELIM: .cfi_offset r4, -12 +; CHECK-ARM-FP-ELIM: add r11, sp, #8 +; CHECK-ARM-FP-ELIM: .cfi_def_cfa r11, 4 +; CHECK-ARM-FP-ELIM: sub sp, sp, #20 +; CHECK-ARM-FP-ELIM: sub sp, sp, #805306368 +; CHECK-ARM-FP-ELIM: bic sp, sp, #15 +; CHECK-ARM-FP-ELIM: .cfi_endproc + diff --git a/test/CodeGen/ARM/debug-frame-no-debug.ll b/test/CodeGen/ARM/debug-frame-no-debug.ll new file mode 100644 index 0000000..81702c6 --- /dev/null +++ b/test/CodeGen/ARM/debug-frame-no-debug.ll @@ -0,0 +1,97 @@ +; ARM EHABI integrated test + +; This test case checks that the ARM DWARF stack frame directives +; are not generated if compiling with no debug information. + +; RUN: llc -mtriple arm-unknown-linux-gnueabi \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM + +; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP + +;------------------------------------------------------------------------------- +; Test 1 +;------------------------------------------------------------------------------- +; This is the LLVM assembly generated from following C++ code: +; +; extern void print(int, int, int, int, int); +; extern void print(double, double, double, double, double); +; +; void test(int a, int b, int c, int d, int e, +; double m, double n, double p, double q, double r) { +; try { +; print(a, b, c, d, e); +; } catch (...) { +; print(m, n, p, q, r); +; } +; } + +declare void @_Z5printiiiii(i32, i32, i32, i32, i32) + +declare void @_Z5printddddd(double, double, double, double, double) + +define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, + double %m, double %n, double %p, + double %q, double %r) { +entry: + invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) + to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { i8*, i32 } + personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* null + %1 = extractvalue { i8*, i32 } %0, 0 + %2 = tail call i8* @__cxa_begin_catch(i8* %1) + invoke void @_Z5printddddd(double %m, double %n, double %p, + double %q, double %r) + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret void + +lpad1: + %3 = landingpad { i8*, i32 } + personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: + resume { i8*, i32 } %3 + +terminate.lpad: + %4 = landingpad { i8*, i32 } + personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* null + %5 = extractvalue { i8*, i32 } %4, 0 + tail call void @__clang_call_terminate(i8* %5) + unreachable +} + +declare void @__clang_call_terminate(i8*) + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare void @_ZSt9terminatev() + +; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd: +; CHECK-FP-ELIM-NOT: .cfi_startproc +; CHECK-FP-ELIM: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-FP-ELIM-NOT: .cfi_def_cfa_offset 36 + +; CHECK-THUMB-FP-LABEL: _Z4testiiiiiddddd: +; CHECK-THUMB-FP-NOT: .cfi_startproc +; CHECK-THUMB-FP: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-FP-NOT: .cfi_def_cfa_offset 20 + diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll new file mode 100644 index 0000000..9b39525 --- /dev/null +++ b/test/CodeGen/ARM/debug-frame-vararg.ll @@ -0,0 +1,141 @@ +; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype asm -o - %s | FileCheck %s --check-prefix=CHECK-FP +; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype asm -o - %s -disable-fp-elim | FileCheck %s --check-prefix=CHECK-FP-ELIM +; RUN: llc -mtriple thumb-unknown-linux-gnueabi -filetype asm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-FP +; RUN: llc -mtriple thumb-unknown-linux-gnueabi -filetype asm -o - %s -disable-fp-elim | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM + +; Tests that the initial space allocated to the varargs on the stack is +; taken into account in the the .cfi_ directives. + +; Generated from the C program: +; #include +; +; extern int foo(int); +; +; int sum(int count, ...) { +; va_list vl; +; va_start(vl, count); +; int sum = 0; +; for (int i = 0; i < count; i++) { +; sum += foo(va_arg(vl, int)); +; } +; va_end(vl); +; } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99] +!1 = metadata !{metadata !"var.c", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, ...)* @sum, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/var.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8, metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!11 = metadata !{metadata !"clang version 3.5 "} +!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5] +!13 = metadata !{i32 5, i32 0, metadata !4, null} +!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6] +!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list] +!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"} +!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list] +!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ] +!19 = metadata !{metadata !20} +!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ] +!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ] +!22 = metadata !{i32 6, i32 0, metadata !4, null} +!23 = metadata !{i32 7, i32 0, metadata !4, null} +!24 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8] +!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ] +!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9] +!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c] +!28 = metadata !{i32 9, i32 0, metadata !27, null} +!29 = metadata !{i32 10, i32 0, metadata !30, null} +!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c] +!31 = metadata !{i32 11, i32 0, metadata !30, null} +!32 = metadata !{i32 12, i32 0, metadata !4, null} +!33 = metadata !{i32 13, i32 0, metadata !4, null} + +; CHECK-FP-LABEL: sum +; CHECK-FP: .cfi_startproc +; CHECK-FP: sub sp, sp, #16 +; CHECK-FP: .cfi_def_cfa_offset 16 +; CHECK-FP: push {r4, lr} +; CHECK-FP: .cfi_def_cfa_offset 24 +; CHECK-FP: .cfi_offset lr, -20 +; CHECK-FP: .cfi_offset r4, -24 +; CHECK-FP: sub sp, sp, #8 +; CHECK-FP: .cfi_def_cfa_offset 32 + +; CHECK-FP-ELIM-LABEL: sum +; CHECK-FP-ELIM: .cfi_startproc +; CHECK-FP-ELIM: sub sp, sp, #16 +; CHECK-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-FP-ELIM: push {r4, r11, lr} +; CHECK-FP-ELIM: .cfi_def_cfa_offset 28 +; CHECK-FP-ELIM: .cfi_offset lr, -20 +; CHECK-FP-ELIM: .cfi_offset r11, -24 +; CHECK-FP-ELIM: .cfi_offset r4, -28 +; CHECK-FP-ELIM: add r11, sp, #4 +; CHECK-FP-ELIM: .cfi_def_cfa r11, 24 + +; CHECK-THUMB-FP-LABEL: sum +; CHECK-THUMB-FP: .cfi_startproc +; CHECK-THUMB-FP: sub sp, #16 +; CHECK-THUMB-FP: .cfi_def_cfa_offset 16 +; CHECK-THUMB-FP: push {r4, r5, r7, lr} +; CHECK-THUMB-FP: .cfi_def_cfa_offset 32 +; CHECK-THUMB-FP: .cfi_offset lr, -20 +; CHECK-THUMB-FP: .cfi_offset r7, -24 +; CHECK-THUMB-FP: .cfi_offset r5, -28 +; CHECK-THUMB-FP: .cfi_offset r4, -32 +; CHECK-THUMB-FP: sub sp, #8 +; CHECK-THUMB-FP: .cfi_def_cfa_offset 40 + +; CHECK-THUMB-FP-ELIM-LABEL: sum +; CHECK-THUMB-FP-ELIM: .cfi_startproc +; CHECK-THUMB-FP-ELIM: sub sp, #16 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-THUMB-FP-ELIM: push {r4, r5, r7, lr} +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32 +; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20 +; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24 +; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -28 +; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32 +; CHECK-THUMB-FP-ELIM: add r7, sp, #8 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24 + +define i32 @sum(i32 %count, ...) { +entry: + %vl = alloca i8*, align 4 + %vl1 = bitcast i8** %vl to i8* + call void @llvm.va_start(i8* %vl1) + %cmp4 = icmp sgt i32 %count, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %ap.cur = load i8** %vl, align 4 + %ap.next = getelementptr i8* %ap.cur, i32 4 + store i8* %ap.next, i8** %vl, align 4 + %0 = bitcast i8* %ap.cur to i32* + %1 = load i32* %0, align 4 + %call = call i32 @foo(i32 %1) #1 + %inc = add nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, %count + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + call void @llvm.va_end(i8* %vl1) + ret i32 undef +} + +declare void @llvm.va_start(i8*) nounwind + +declare i32 @foo(i32) + +declare void @llvm.va_end(i8*) nounwind diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll new file mode 100644 index 0000000..cf68767 --- /dev/null +++ b/test/CodeGen/ARM/debug-frame.ll @@ -0,0 +1,574 @@ +; ARM EHABI integrated test + +; This test case checks whether the ARM DWARF stack frame directives +; are properly generated or not. + +; We have to check several cases: +; (1) arm with -disable-fp-elim +; (2) arm without -disable-fp-elim +; (3) armv7 with -disable-fp-elim +; (4) armv7 without -disable-fp-elim +; (5) thumb with -disable-fp-elim +; (6) thumb without -disable-fp-elim +; (7) thumbv7 with -disable-fp-elim +; (8) thumbv7 without -disable-fp-elim +; (9) thumbv7 with -no-integrated-as + +; RUN: llc -mtriple arm-unknown-linux-gnueabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP + +; RUN: llc -mtriple arm-unknown-linux-gnueabi \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM + +; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP + +; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM + +; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP + +; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP-ELIM + +; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \ +; RUN: -disable-fp-elim -no-integrated-as -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP-NOIAS + +;------------------------------------------------------------------------------- +; Test 1 +;------------------------------------------------------------------------------- +; This is the LLVM assembly generated from following C++ code: +; +; extern void print(int, int, int, int, int); +; extern void print(double, double, double, double, double); +; +; void test(int a, int b, int c, int d, int e, +; double m, double n, double p, double q, double r) { +; try { +; print(a, b, c, d, e); +; } catch (...) { +; print(m, n, p, q, r); +; } +; } + +declare void @_Z5printiiiii(i32, i32, i32, i32, i32) + +declare void @_Z5printddddd(double, double, double, double, double) + +define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, + double %m, double %n, double %p, + double %q, double %r) { +entry: + invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) + to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { i8*, i32 } + personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* null + %1 = extractvalue { i8*, i32 } %0, 0 + %2 = tail call i8* @__cxa_begin_catch(i8* %1) + invoke void @_Z5printddddd(double %m, double %n, double %p, + double %q, double %r) + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret void + +lpad1: + %3 = landingpad { i8*, i32 } + personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: + resume { i8*, i32 } %3 + +terminate.lpad: + %4 = landingpad { i8*, i32 } + personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* null + %5 = extractvalue { i8*, i32 } %4, 0 + tail call void @__clang_call_terminate(i8* %5) + unreachable +} + +declare void @__clang_call_terminate(i8*) + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare void @_ZSt9terminatev() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!10, !11} +!llvm.ident = !{!12} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/exp.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"exp.cpp", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"_Z4testiiiiiddddd", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 5] [test] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/exp.cpp] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{null, metadata !8, metadata !8, metadata !8, metadata !8, metadata !8, metadata !9, metadata !9, metadata !9, metadata !9, metadata !9} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float] +!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!12 = metadata !{metadata !"clang version 3.5 "} +!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4] +!14 = metadata !{i32 4, i32 0, metadata !4, null} +!15 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554436, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 4] +!16 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331652, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 4] +!17 = metadata !{i32 786689, metadata !4, metadata !"d", metadata !5, i32 67108868, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [d] [line 4] +!18 = metadata !{i32 786689, metadata !4, metadata !"e", metadata !5, i32 83886084, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [e] [line 4] +!19 = metadata !{i32 786689, metadata !4, metadata !"m", metadata !5, i32 100663301, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [m] [line 5] +!20 = metadata !{i32 5, i32 0, metadata !4, null} +!21 = metadata !{i32 786689, metadata !4, metadata !"n", metadata !5, i32 117440517, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [n] [line 5] +!22 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 134217733, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 5] +!23 = metadata !{i32 786689, metadata !4, metadata !"q", metadata !5, i32 150994949, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [q] [line 5] +!24 = metadata !{i32 786689, metadata !4, metadata !"r", metadata !5, i32 167772165, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 5] +!25 = metadata !{i32 7, i32 0, metadata !26, null} +!26 = metadata !{i32 786443, metadata !1, metadata !4, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp] +!27 = metadata !{i32 8, i32 0, metadata !26, null} ; [ DW_TAG_imported_declaration ] +!28 = metadata !{i32 11, i32 0, metadata !26, null} +!29 = metadata !{i32 9, i32 0, metadata !30, null} +!30 = metadata !{i32 786443, metadata !1, metadata !4, i32 8, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp] +!31 = metadata !{i32 10, i32 0, metadata !30, null} +!32 = metadata !{i32 10, i32 0, metadata !4, null} +!33 = metadata !{i32 11, i32 0, metadata !4, null} +!34 = metadata !{i32 11, i32 0, metadata !30, null} + +; CHECK-FP-LABEL: _Z4testiiiiiddddd: +; CHECK-FP: .cfi_startproc +; CHECK-FP: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-FP: .cfi_def_cfa_offset 36 +; CHECK-FP: .cfi_offset lr, -4 +; CHECK-FP: .cfi_offset r11, -8 +; CHECK-FP: .cfi_offset r10, -12 +; CHECK-FP: .cfi_offset r9, -16 +; CHECK-FP: .cfi_offset r8, -20 +; CHECK-FP: .cfi_offset r7, -24 +; CHECK-FP: .cfi_offset r6, -28 +; CHECK-FP: .cfi_offset r5, -32 +; CHECK-FP: .cfi_offset r4, -36 +; CHECK-FP: add r11, sp, #28 +; CHECK-FP: .cfi_def_cfa r11, 8 +; CHECK-FP: sub sp, sp, #28 +; CHECK-FP: .cfi_endproc + +; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd: +; CHECK-FP-ELIM: .cfi_startproc +; CHECK-FP-ELIM: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-FP-ELIM: .cfi_def_cfa_offset 36 +; CHECK-FP-ELIM: .cfi_offset lr, -4 +; CHECK-FP-ELIM: .cfi_offset r11, -8 +; CHECK-FP-ELIM: .cfi_offset r10, -12 +; CHECK-FP-ELIM: .cfi_offset r9, -16 +; CHECK-FP-ELIM: .cfi_offset r8, -20 +; CHECK-FP-ELIM: .cfi_offset r7, -24 +; CHECK-FP-ELIM: .cfi_offset r6, -28 +; CHECK-FP-ELIM: .cfi_offset r5, -32 +; CHECK-FP-ELIM: .cfi_offset r4, -36 +; CHECK-FP-ELIM: sub sp, sp, #28 +; CHECK-FP-ELIM: .cfi_def_cfa_offset 64 +; CHECK-FP-ELIM: .cfi_endproc + +; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd: +; CHECK-V7-FP: .cfi_startproc +; CHECK-V7-FP: push {r4, r11, lr} +; CHECK-V7-FP: .cfi_def_cfa_offset 12 +; CHECK-V7-FP: .cfi_offset lr, -4 +; CHECK-V7-FP: .cfi_offset r11, -8 +; CHECK-V7-FP: .cfi_offset r4, -12 +; CHECK-V7-FP: add r11, sp, #4 +; CHECK-V7-FP: .cfi_def_cfa r11, 8 +; CHECK-V7-FP: vpush {d8, d9, d10, d11, d12} +; CHECK-V7-FP: .cfi_offset d12, -24 +; CHECK-V7-FP: .cfi_offset d11, -32 +; CHECK-V7-FP: .cfi_offset d10, -40 +; CHECK-V7-FP: .cfi_offset d9, -48 +; CHECK-V7-FP: .cfi_offset d8, -56 +; CHECK-V7-FP: sub sp, sp, #28 +; CHECK-V7-FP: .cfi_endproc + +; CHECK-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd: +; CHECK-V7-FP-ELIM: .cfi_startproc +; CHECK-V7-FP-ELIM: push {r4, lr} +; CHECK-V7-FP-ELIM: .cfi_def_cfa_offset 8 +; CHECK-V7-FP-ELIM: .cfi_offset lr, -4 +; CHECK-V7-FP-ELIM: .cfi_offset r4, -8 +; CHECK-V7-FP-ELIM: vpush {d8, d9, d10, d11, d12} +; CHECK-V7-FP-ELIM: .cfi_def_cfa_offset 48 +; CHECK-V7-FP-ELIM: .cfi_offset d12, -16 +; CHECK-V7-FP-ELIM: .cfi_offset d11, -24 +; CHECK-V7-FP-ELIM: .cfi_offset d10, -32 +; CHECK-V7-FP-ELIM: .cfi_offset d9, -40 +; CHECK-V7-FP-ELIM: .cfi_offset d8, -48 +; CHECK-V7-FP-ELIM: sub sp, sp, #24 +; CHECK-V7-FP-ELIM: .cfi_def_cfa_offset 72 +; CHECK-V7-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-FP-LABEL: _Z4testiiiiiddddd: +; CHECK-THUMB-FP: .cfi_startproc +; CHECK-THUMB-FP: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-FP: .cfi_def_cfa_offset 20 +; CHECK-THUMB-FP: .cfi_offset lr, -4 +; CHECK-THUMB-FP: .cfi_offset r7, -8 +; CHECK-THUMB-FP: .cfi_offset r6, -12 +; CHECK-THUMB-FP: .cfi_offset r5, -16 +; CHECK-THUMB-FP: .cfi_offset r4, -20 +; CHECK-THUMB-FP: add r7, sp, #12 +; CHECK-THUMB-FP: .cfi_def_cfa r7, 8 +; CHECK-THUMB-FP: sub sp, #60 +; CHECK-THUMB-FP: .cfi_endproc + +; CHECK-THUMB-FP-ELIM-LABEL: _Z4testiiiiiddddd: +; CHECK-THUMB-FP-ELIM: .cfi_startproc +; CHECK-THUMB-FP-ELIM: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 20 +; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -4 +; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -8 +; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -12 +; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -16 +; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -20 +; CHECK-THUMB-FP-ELIM: sub sp, #60 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 80 +; CHECK-THUMB-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-V7-FP-LABEL: _Z4testiiiiiddddd: +; CHECK-THUMB-V7-FP: .cfi_startproc +; CHECK-THUMB-V7-FP: push.w {r4, r7, r11, lr} +; CHECK-THUMB-V7-FP: .cfi_def_cfa_offset 16 +; CHECK-THUMB-V7-FP: .cfi_offset lr, -4 +; CHECK-THUMB-V7-FP: .cfi_offset r11, -8 +; CHECK-THUMB-V7-FP: .cfi_offset r7, -12 +; CHECK-THUMB-V7-FP: .cfi_offset r4, -16 +; CHECK-THUMB-V7-FP: add r7, sp, #4 +; CHECK-THUMB-V7-FP: .cfi_def_cfa r7, 12 +; CHECK-THUMB-V7-FP: vpush {d8, d9, d10, d11, d12} +; CHECK-THUMB-V7-FP: .cfi_offset d12, -24 +; CHECK-THUMB-V7-FP: .cfi_offset d11, -32 +; CHECK-THUMB-V7-FP: .cfi_offset d10, -40 +; CHECK-THUMB-V7-FP: .cfi_offset d9, -48 +; CHECK-THUMB-V7-FP: .cfi_offset d8, -56 +; CHECK-THUMB-V7-FP: sub sp, #24 +; CHECK-THUMB-V7-FP: .cfi_endproc + +; CHECK-THUMB-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd: +; CHECK-THUMB-V7-FP-ELIM: .cfi_startproc +; CHECK-THUMB-V7-FP-ELIM: push {r4, lr} +; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 8 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset lr, -4 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r4, -8 +; CHECK-THUMB-V7-FP-ELIM: vpush {d8, d9, d10, d11, d12} +; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 48 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset d12, -16 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset d11, -24 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset d10, -32 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset d9, -40 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset d8, -48 +; CHECK-THUMB-V7-FP-ELIM: sub sp, #24 +; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 72 +; CHECK-THUMB-V7-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-V7-FP-NOIAS-LABEL: _Z4testiiiiiddddd: +; CHECK-THUMB-V7-FP-NOIAS: .cfi_startproc +; CHECK-THUMB-V7-FP-NOIAS: push.w {r4, r7, r11, lr} +; CHECK-THUMB-V7-FP-NOIAS: .cfi_def_cfa_offset 16 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 14, -4 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 11, -8 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 7, -12 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 4, -16 +; CHECK-THUMB-V7-FP-NOIAS: add r7, sp, #4 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_def_cfa 7, 12 +; CHECK-THUMB-V7-FP-NOIAS: vpush {d8, d9, d10, d11, d12} +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 268, -24 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 267, -32 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 266, -40 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 265, -48 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 264, -56 +; CHECK-THUMB-V7-FP-NOIAS: sub sp, #24 +; CHECK-THUMB-V7-FP-NOIAS: .cfi_endproc + +;------------------------------------------------------------------------------- +; Test 2 +;------------------------------------------------------------------------------- + +declare void @throw_exception_2() + +define void @test2() { +entry: + call void @throw_exception_2() + ret void +} + +; CHECK-FP-LABEL: test2: +; CHECK-FP: .cfi_startproc +; CHECK-FP: push {r11, lr} +; CHECK-FP: .cfi_def_cfa_offset 8 +; CHECK-FP: .cfi_offset lr, -4 +; CHECK-FP: .cfi_offset r11, -8 +; CHECK-FP: mov r11, sp +; CHECK-FP: .cfi_def_cfa_register r11 +; CHECK-FP: pop {r11, lr} +; CHECK-FP: mov pc, lr +; CHECK-FP: .cfi_endproc + +; CHECK-FP-ELIM-LABEL: test2: +; CHECK-FP-ELIM: .cfi_startproc +; CHECK-FP-ELIM: push {r11, lr} +; CHECK-FP-ELIM: .cfi_def_cfa_offset 8 +; CHECK-FP-ELIM: .cfi_offset lr, -4 +; CHECK-FP-ELIM: .cfi_offset r11, -8 +; CHECK-FP-ELIM: pop {r11, lr} +; CHECK-FP-ELIM: mov pc, lr +; CHECK-FP-ELIM: .cfi_endproc + +; CHECK-V7-FP-LABEL: test2: +; CHECK-V7-FP: .cfi_startproc +; CHECK-V7-FP: push {r11, lr} +; CHECK-V7-FP: .cfi_def_cfa_offset 8 +; CHECK-V7-FP: .cfi_offset lr, -4 +; CHECK-V7-FP: .cfi_offset r11, -8 +; CHECK-V7-FP: mov r11, sp +; CHECK-V7-FP: .cfi_def_cfa_register r11 +; CHECK-V7-FP: pop {r11, pc} +; CHECK-V7-FP: .cfi_endproc + +; CHECK-V7-FP-ELIM-LABEL: test2: +; CHECK-V7-FP-ELIM: .cfi_startproc +; CHECK-V7-FP-ELIM: push {r11, lr} +; CHECK-V7-FP-ELIM: .cfi_def_cfa_offset 8 +; CHECK-V7-FP-ELIM: .cfi_offset lr, -4 +; CHECK-V7-FP-ELIM: .cfi_offset r11, -8 +; CHECK-V7-FP-ELIM: pop {r11, pc} +; CHECK-V7-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-FP-LABEL: test2: +; CHECK-THUMB-FP: .cfi_startproc +; CHECK-THUMB-FP: push {r7, lr} +; CHECK-THUMB-FP: .cfi_def_cfa_offset 8 +; CHECK-THUMB-FP: .cfi_offset lr, -4 +; CHECK-THUMB-FP: .cfi_offset r7, -8 +; CHECK-THUMB-FP: add r7, sp, #0 +; CHECK-THUMB-FP: .cfi_def_cfa_register r7 +; CHECK-THUMB-FP: pop {r7, pc} +; CHECK-THUMB-FP: .cfi_endproc + +; CHECK-THUMB-FP-ELIM-LABEL: test2: +; CHECK-THUMB-FP-ELIM: .cfi_startproc +; CHECK-THUMB-FP-ELIM: push {r7, lr} +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 8 +; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -4 +; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -8 +; CHECK-THUMB-FP-ELIM: pop {r7, pc} +; CHECK-THUMB-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-V7-FP-LABEL: test2: +; CHECK-THUMB-V7-FP: .cfi_startproc +; CHECK-THUMB-V7-FP: push {r7, lr} +; CHECK-THUMB-V7-FP: .cfi_def_cfa_offset 8 +; CHECK-THUMB-V7-FP: .cfi_offset lr, -4 +; CHECK-THUMB-V7-FP: .cfi_offset r7, -8 +; CHECK-THUMB-V7-FP: mov r7, sp +; CHECK-THUMB-V7-FP: .cfi_def_cfa_register r7 +; CHECK-THUMB-V7-FP: pop {r7, pc} +; CHECK-THUMB-V7-FP: .cfi_endproc + +; CHECK-THUMB-V7-FP-ELIM-LABEL: test2: +; CHECK-THUMB-V7-FP-ELIM: .cfi_startproc +; CHECK-THUMB-V7-FP-ELIM: push.w {r11, lr} +; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 8 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset lr, -4 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r11, -8 +; CHECK-THUMB-V7-FP-ELIM: pop.w {r11, pc} +; CHECK-THUMB-V7-FP-ELIM: .cfi_endproc + + +;------------------------------------------------------------------------------- +; Test 3 +;------------------------------------------------------------------------------- + +declare void @throw_exception_3(i32) + +define i32 @test3(i32 %a, i32 %b, i32 %c, i32 %d, + i32 %e, i32 %f, i32 %g, i32 %h) { +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + %add2 = add nsw i32 %add1, %d + tail call void @throw_exception_3(i32 %add2) + %add3 = add nsw i32 %f, %e + %add4 = add nsw i32 %add3, %g + %add5 = add nsw i32 %add4, %h + tail call void @throw_exception_3(i32 %add5) + %add6 = add nsw i32 %add5, %add2 + ret i32 %add6 +} + +; CHECK-FP-LABEL: test3: +; CHECK-FP: .cfi_startproc +; CHECK-FP: push {r4, r5, r11, lr} +; CHECK-FP: .cfi_def_cfa_offset 16 +; CHECK-FP: .cfi_offset lr, -4 +; CHECK-FP: .cfi_offset r11, -8 +; CHECK-FP: .cfi_offset r5, -12 +; CHECK-FP: .cfi_offset r4, -16 +; CHECK-FP: add r11, sp, #8 +; CHECK-FP: .cfi_def_cfa r11, 8 +; CHECK-FP: pop {r4, r5, r11, lr} +; CHECK-FP: mov pc, lr +; CHECK-FP: .cfi_endproc + +; CHECK-FP-ELIM-LABEL: test3: +; CHECK-FP-ELIM: .cfi_startproc +; CHECK-FP-ELIM: push {r4, r5, r11, lr} +; CHECK-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-FP-ELIM: .cfi_offset lr, -4 +; CHECK-FP-ELIM: .cfi_offset r11, -8 +; CHECK-FP-ELIM: .cfi_offset r5, -12 +; CHECK-FP-ELIM: .cfi_offset r4, -16 +; CHECK-FP-ELIM: pop {r4, r5, r11, lr} +; CHECK-FP-ELIM: mov pc, lr +; CHECK-FP-ELIM: .cfi_endproc + +; CHECK-V7-FP-LABEL: test3: +; CHECK-V7-FP: .cfi_startproc +; CHECK-V7-FP: push {r4, r5, r11, lr} +; CHECK-V7-FP: .cfi_def_cfa_offset 16 +; CHECK-V7-FP: .cfi_offset lr, -4 +; CHECK-V7-FP: .cfi_offset r11, -8 +; CHECK-V7-FP: .cfi_offset r5, -12 +; CHECK-V7-FP: .cfi_offset r4, -16 +; CHECK-V7-FP: add r11, sp, #8 +; CHECK-V7-FP: .cfi_def_cfa r11, 8 +; CHECK-V7-FP: pop {r4, r5, r11, pc} +; CHECK-V7-FP: .cfi_endproc + +; CHECK-V7-FP-ELIM-LABEL: test3: +; CHECK-V7-FP-ELIM: .cfi_startproc +; CHECK-V7-FP-ELIM: push {r4, r5, r11, lr} +; CHECK-V7-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-V7-FP-ELIM: .cfi_offset lr, -4 +; CHECK-V7-FP-ELIM: .cfi_offset r11, -8 +; CHECK-V7-FP-ELIM: .cfi_offset r5, -12 +; CHECK-V7-FP-ELIM: .cfi_offset r4, -16 +; CHECK-V7-FP-ELIM: pop {r4, r5, r11, pc} +; CHECK-V7-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-FP-LABEL: test3: +; CHECK-THUMB-FP: .cfi_startproc +; CHECK-THUMB-FP: push {r4, r5, r7, lr} +; CHECK-THUMB-FP: .cfi_def_cfa_offset 16 +; CHECK-THUMB-FP: .cfi_offset lr, -4 +; CHECK-THUMB-FP: .cfi_offset r7, -8 +; CHECK-THUMB-FP: .cfi_offset r5, -12 +; CHECK-THUMB-FP: .cfi_offset r4, -16 +; CHECK-THUMB-FP: add r7, sp, #8 +; CHECK-THUMB-FP: .cfi_def_cfa r7, 8 +; CHECK-THUMB-FP: pop {r4, r5, r7, pc} +; CHECK-THUMB-FP: .cfi_endproc + +; CHECK-THUMB-FP-ELIM-LABEL: test3: +; CHECK-THUMB-FP-ELIM: .cfi_startproc +; CHECK-THUMB-FP-ELIM: push {r4, r5, r7, lr} +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -4 +; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -8 +; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -12 +; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -16 +; CHECK-THUMB-FP-ELIM: pop {r4, r5, r7, pc} +; CHECK-THUMB-FP-ELIM: .cfi_endproc + +; CHECK-THUMB-V7-FP-LABEL: test3: +; CHECK-THUMB-V7-FP: .cfi_startproc +; CHECK-THUMB-V7-FP: push {r4, r5, r7, lr} +; CHECK-THUMB-V7-FP: .cfi_def_cfa_offset 16 +; CHECK-THUMB-V7-FP: .cfi_offset lr, -4 +; CHECK-THUMB-V7-FP: .cfi_offset r7, -8 +; CHECK-THUMB-V7-FP: .cfi_offset r5, -12 +; CHECK-THUMB-V7-FP: .cfi_offset r4, -16 +; CHECK-THUMB-V7-FP: add r7, sp, #8 +; CHECK-THUMB-V7-FP: .cfi_def_cfa r7, 8 +; CHECK-THUMB-V7-FP: pop {r4, r5, r7, pc} +; CHECK-THUMB-V7-FP: .cfi_endproc + +; CHECK-THUMB-V7-FP-ELIM-LABEL: test3: +; CHECK-THUMB-V7-FP-ELIM: .cfi_startproc +; CHECK-THUMB-V7-FP-ELIM: push.w {r4, r5, r11, lr} +; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset lr, -4 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r11, -8 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r5, -12 +; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r4, -16 +; CHECK-THUMB-V7-FP-ELIM: pop.w {r4, r5, r11, pc} +; CHECK-THUMB-V7-FP-ELIM: .cfi_endproc + + +;------------------------------------------------------------------------------- +; Test 4 +;------------------------------------------------------------------------------- + +define void @test4() nounwind { +entry: + ret void +} + +; CHECK-FP-LABEL: test4: +; CHECK-FP: mov pc, lr +; CHECK-FP-NOT: .cfi_def_cfa_offset + +; CHECK-FP-ELIM-LABEL: test4: +; CHECK-FP-ELIM: mov pc, lr +; CHECK-FP-ELIM-NOT: .cfi_def_cfa_offset + +; CHECK-V7-FP-LABEL: test4: +; CHECK-V7-FP: bx lr +; CHECK-V7-FP-NOT: .cfi_def_cfa_offset + +; CHECK-V7-FP-ELIM-LABEL: test4: +; CHECK-V7-FP-ELIM: bx lr +; CHECK-V7-FP-ELIM-NOT: .cfi_def_cfa_offset + +; CHECK-THUMB-FP-LABEL: test4: +; CHECK-THUMB-FP: bx lr +; CHECK-THUMB-FP-NOT: .cfi_def_cfa_offset + +; CHECK-THUMB-FP-ELIM-LABEL: test4: +; CHECK-THUMB-FP-ELIM: bx lr +; CHECK-THUMB-FP-ELIM-NOT: .cfi_def_cfa_offset + +; CHECK-THUMB-V7-FP-LABEL: test4: +; CHECK-THUMB-V7-FP: bx lr +; CHECK-THUMB-V7-FP-NOT: .cfi_def_cfa_offset + +; CHECK-THUMB-V7-FP-ELIM-LABEL: test4: +; CHECK-THUMB-V7-FP-ELIM: bx lr +; CHECK-THUMB-V7-FP-ELIM-NOT: .cfi_def_cfa_offset + diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll index ee515fd5..03ce312 100644 --- a/test/CodeGen/ARM/debug-info-qreg.ll +++ b/test/CodeGen/ARM/debug-info-qreg.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-macosx10.6.7" -;CHECK: DW_OP_regx for Q register: D1 +;CHECK: sub-register +;CHECK-NEXT: DW_OP_regx ;CHECK-NEXT: ascii -;CHECK-NEXT: DW_OP_piece 8 +;CHECK-NEXT: DW_OP_piece ;CHECK-NEXT: byte 8 -;CHECK-NEXT: DW_OP_regx for Q register: D2 +;CHECK-NEXT: sub-register +;CHECK-NEXT: DW_OP_regx ;CHECK-NEXT: ascii -;CHECK-NEXT: DW_OP_piece 8 +;CHECK-NEXT: DW_OP_piece ;CHECK-NEXT: byte 8 @.str = external constant [13 x i8] diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll index e92d977..ee9faf8 100644 --- a/test/CodeGen/ARM/debug-info-s16-reg.ll +++ b/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -1,9 +1,11 @@ ; RUN: llc < %s - | FileCheck %s ; Radar 9309221 ; Test dwarf reg no for s16 -;CHECK: DW_OP_regx for S register +;CHECK: super-register +;CHECK-NEXT: DW_OP_regx ;CHECK-NEXT: ascii -;CHECK-NEXT: DW_OP_bit_piece 32 0 +;CHECK-NEXT: DW_OP_piece +;CHECK-NEXT: 4 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-macosx10.6.7" diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll index 854fcab..71a696a 100644 --- a/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/test/CodeGen/ARM/debug-info-sreg2.ll @@ -3,13 +3,19 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-macosx10.6.7" -;CHECK: Ldebug_loc0: -;CHECK-NEXT: .long Ltmp0 -;CHECK-NEXT: .long Ltmp1 +;CHECK-LABEL: Lfunc_begin0: +;CHECK: Ltmp[[K:[0-9]+]]: +;CHECK: Ltmp[[L:[0-9]+]]: +;CHECK-LABEL: Ldebug_loc0: +;CHECK-NEXT: .long Ltmp[[K]] +;CHECK-NEXT: .long Ltmp[[L]] ;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]] @ Loc expr size ;CHECK-NEXT: .short Lset[[N]] ;CHECK-NEXT: Ltmp[[M]]: -;CHECK-NEXT: .byte 144 @ DW_OP_regx for S register +;CHECK-NEXT: .byte 144 @ super-register +;CHECK-NEXT: @ DW_OP_regx +;CHECK-NEXT: .ascii +;CHECK-NEXT: .byte {{[0-9]+}} @ DW_OP_{{.*}}piece define void @_Z3foov() optsize ssp { entry: diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll new file mode 100644 index 0000000..b0dc467 --- /dev/null +++ b/test/CodeGen/ARM/debug-segmented-stacks.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +define void @test_basic() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; ARM-linux: test_basic: + +; ARM-linux: push {r4, r5} +; ARM-linux: .cfi_def_cfa_offset 8 +; ARM-linux: .cfi_offset r5, -4 +; ARM-linux: .cfi_offset r4, -8 +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB0_2 + +; ARM-linux: mov r4, #48 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux: .cfi_def_cfa_offset 12 +; ARM-linux: .cfi_offset lr, -12 +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux: .cfi_def_cfa_offset 0 +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} +; ARM-linux: .cfi_def_cfa_offset 0 +; ARM-linux .cfi_same_value r4 +; ARM-linux .cfi_same_value r5 +} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99] +!1 = metadata !{metadata !"var.c", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_basic", + metadata !"test_basic", metadata !"", i32 5, metadata !6, i1 false, i1 true, + i32 0, i32 0, null, i32 256, i1 false, void ()* @test_basic, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/var.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8, metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!11 = metadata !{metadata !"clang version 3.5 "} +!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5] +!13 = metadata !{i32 5, i32 0, metadata !4, null} +!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6] +!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list] +!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"} +!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list] +!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ] +!19 = metadata !{metadata !20} +!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ] +!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ] +!22 = metadata !{i32 6, i32 0, metadata !4, null} +!23 = metadata !{i32 7, i32 0, metadata !4, null} +!24 = metadata !{i32 786688, metadata !4, metadata !"test_basic", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8] +!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ] +!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9] +!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c] +!28 = metadata !{i32 9, i32 0, metadata !27, null} +!29 = metadata !{i32 10, i32 0, metadata !30, null} +!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c] +!31 = metadata !{i32 11, i32 0, metadata !30, null} +!32 = metadata !{i32 12, i32 0, metadata !4, null} +!33 = metadata !{i32 13, i32 0, metadata !4, null} + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) diff --git a/test/CodeGen/ARM/default-float-abi.ll b/test/CodeGen/ARM/default-float-abi.ll new file mode 100644 index 0000000..1b26bbd --- /dev/null +++ b/test/CodeGen/ARM/default-float-abi.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=armv7-linux-eabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=armv7-linux-gnueabihf -float-abi=soft %s -o - | FileCheck %s --check-prefix=CHECK-SOFT +; RUN: llc -mtriple=armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=CHECK-SOFT +; RUN: llc -mtriple=armv7-linux-eabi -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=thumbv7-apple-ios6.0 %s -o - | FileCheck %s --check-prefix=CHECK-SOFT + +define float @test_abi(float %lhs, float %rhs) { + %sum = fadd float %lhs, %rhs + ret float %sum + +; CHECK-HARD-LABEL: test_abi: +; CHECK-HARD-NOT: vmov +; CHECK-HARD: vadd.f32 s0, s0, s1 +; CHECK-HARD-NOT: vmov + +; CHECK-SOFT-LABEL: test_abi: +; CHECK-SOFT-DAG: vmov [[LHS:s[0-9]+]], r0 +; CHECK-SOFT-DAG: vmov [[RHS:s[0-9]+]], r1 +; CHECK-SOFT: vadd.f32 [[DEST:s[0-9]+]], [[LHS]], [[RHS]] +; CHECK-SOFT: vmov r0, [[DEST]] +} diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll index 404cae0..7f72048 100644 --- a/test/CodeGen/ARM/divmod-eabi.ll +++ b/test/CodeGen/ARM/divmod-eabi.ll @@ -1,6 +1,9 @@ ; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI +; RUN: llc -mtriple armv7-none-eabihf %s -o - | FileCheck %s --check-prefix=EABI ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU ; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN +; FIXME: long-term, we will use "-apple-macho" and won't need this exception: +; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - | FileCheck %s --check-prefix=DARWIN define signext i16 @f16(i16 signext %a, i16 signext %b) { ; EABI-LABEL: f16: @@ -186,7 +189,7 @@ entry: %div = sdiv i32 %a, %b ; EABI: __aeabi_idivmod ; EABI: mov [[div:r[0-9]+]], r0 -; GNU __aeabi_idiv +; GNU: __aeabi_idiv ; GNU: mov [[sum:r[0-9]+]], r0 ; DARWIN: ___divsi3 ; DARWIN: mov [[sum:r[0-9]+]], r0 diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll index de2820e..4ac5b8a 100644 --- a/test/CodeGen/ARM/dyn-stackalloc.ll +++ b/test/CodeGen/ARM/dyn-stackalloc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null %struct.comment = type { i8**, i32*, i32, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll index cb5291b..f86b66c 100644 --- a/test/CodeGen/ARM/ehabi-filters.ll +++ b/test/CodeGen/ARM/ehabi-filters.ll @@ -1,4 +1,4 @@ -; RUN: llc -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv7-none-linux-gnueabi" diff --git a/test/CodeGen/ARM/ehabi-no-landingpad.ll b/test/CodeGen/ARM/ehabi-no-landingpad.ll index ac0dff4..d5c74c5 100644 --- a/test/CodeGen/ARM/ehabi-no-landingpad.ll +++ b/test/CodeGen/ARM/ehabi-no-landingpad.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi \ -; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors | FileCheck %s +; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv7-unknown-linux-gnueabi" diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll index fd7d0e6..a86f340 100644 --- a/test/CodeGen/ARM/ehabi-unwind.ll +++ b/test/CodeGen/ARM/ehabi-unwind.ll @@ -1,8 +1,7 @@ ; Test that the EHABI unwind instruction generator does not encounter any ; unfamiliar instructions. -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors +; RUN: llc < %s -mtriple=thumbv7 -disable-fp-elim +; RUN: llc < %s -mtriple=thumbv7 define void @_Z1fv() nounwind { entry: diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll index 6644652..720cc3c 100644 --- a/test/CodeGen/ARM/ehabi.ll +++ b/test/CodeGen/ARM/ehabi.ll @@ -19,22 +19,34 @@ ; (4) armv7 without -disable-fp-elim ; RUN: llc -mtriple arm-unknown-linux-gnueabi \ -; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -disable-fp-elim -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-FP ; RUN: llc -mtriple arm-unknown-linux-gnueabi \ -; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM ; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ -; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -disable-fp-elim -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP ; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ -; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM + +; RUN: llc -mtriple arm-unknown-linux-androideabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP + +; RUN: llc -mtriple arm-unknown-linux-androideabi \ +; RUN: -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM + +; RUN: llc -mtriple armv7-unknown-linux-androideabi \ +; RUN: -disable-fp-elim -filetype=asm -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP + +; RUN: llc -mtriple armv7-unknown-linux-androideabi \ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM @@ -169,7 +181,7 @@ declare void @throw_exception_2() define void @test2() { entry: - tail call void @throw_exception_2() + call void @throw_exception_2() ret void } diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll index 8ccf58c..f55b951 100644 --- a/test/CodeGen/ARM/extload-knownzero.ll +++ b/test/CodeGen/ARM/extload-knownzero.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; rdar://12771555 define void @foo(i16* %ptr, i32 %a) nounwind { diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll index dc45ce7..2504c6c 100644 --- a/test/CodeGen/ARM/extloadi1.ll +++ b/test/CodeGen/ARM/extloadi1.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null + @handler_installed.6144.b = external global i1 ; [#uses=1] define void @__mf_sigusr1_respond() { diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 21219ce..b5d3bda 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -1,9 +1,20 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U -; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP0 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8U + +; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8U + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll index 917a15d..2d7378e 100644 --- a/test/CodeGen/ARM/fast-isel-call.ll +++ b/test/CodeGen/ARM/fast-isel-call.ll @@ -8,8 +8,6 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP -; XFAIL: vg_leak - ; Note that some of these tests assume that relocations are either ; movw/movt or constant pool loads. Different platforms will select ; different approaches. diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll index d606877..cccd9eb 100644 --- a/test/CodeGen/ARM/fast-isel-crash2.ll +++ b/test/CodeGen/ARM/fast-isel-crash2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi ; rdar://9515076 ; (Make sure this doesn't crash.) diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll index 8542bb5..93cdbbb 100644 --- a/test/CodeGen/ARM/fast-isel-frameaddr.ll +++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=DARWIN-ARM ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=DARWIN-THUMB2 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2 define i8* @frameaddr_index0() nounwind { @@ -34,14 +34,12 @@ entry: ; DARWIN-ARM-LABEL: frameaddr_index1: ; DARWIN-ARM: push {r7} ; DARWIN-ARM: mov r7, sp -; DARWIN-ARM: mov r0, r7 -; DARWIN-ARM: ldr r0, [r0] +; DARWIN-ARM: ldr r0, [r7] ; DARWIN-THUMB2-LABEL: frameaddr_index1: ; DARWIN-THUMB2: str r7, [sp, #-4]! ; DARWIN-THUMB2: mov r7, sp -; DARWIN-THUMB2: mov r0, r7 -; DARWIN-THUMB2: ldr r0, [r0] +; DARWIN-THUMB2: ldr r0, [r7] ; LINUX-ARM-LABEL: frameaddr_index1: ; LINUX-ARM: push {r11} @@ -63,16 +61,14 @@ entry: ; DARWIN-ARM-LABEL: frameaddr_index3: ; DARWIN-ARM: push {r7} ; DARWIN-ARM: mov r7, sp -; DARWIN-ARM: mov r0, r7 -; DARWIN-ARM: ldr r0, [r0] +; DARWIN-ARM: ldr r0, [r7] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-THUMB2-LABEL: frameaddr_index3: ; DARWIN-THUMB2: str r7, [sp, #-4]! ; DARWIN-THUMB2: mov r7, sp -; DARWIN-THUMB2: mov r0, r7 -; DARWIN-THUMB2: ldr r0, [r0] +; DARWIN-THUMB2: ldr r0, [r7] ; DARWIN-THUMB2: ldr r0, [r0] ; DARWIN-THUMB2: ldr r0, [r0] diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index b08b72b..089209e 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -5,8 +5,6 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG -; XFAIL: vg_leak - ; Note that some of these tests assume that relocations are either ; movw/movt or constant pool loads. Different platforms will select ; different approaches. @@ -15,7 +13,7 @@ @temp = common global [60 x i8] zeroinitializer, align 1 define void @t1() nounwind ssp { -; ARM: t1 +; ARM-LABEL: t1: ; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; ARM: add r0, r0, #5 @@ -23,12 +21,12 @@ define void @t1() nounwind ssp { ; ARM: movw r2, #10 ; ARM: and r1, r1, #255 ; ARM: bl {{_?}}memset -; ARM-LONG: t1 +; ARM-LONG-LABEL: t1: ; ARM-LONG: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}} ; ARM-LONG: {{(movt r3, :upper16:L_memset\$non_lazy_ptr)?}} ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 -; THUMB: t1 +; THUMB-LABEL: t1: ; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; THUMB: adds r0, #5 @@ -38,7 +36,7 @@ define void @t1() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: and r1, r1, #255 ; THUMB: bl {{_?}}memset -; THUMB-LONG: t1 +; THUMB-LONG-LABEL: t1: ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] @@ -50,7 +48,7 @@ define void @t1() nounwind ssp { declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind define void @t2() nounwind ssp { -; ARM: t2 +; ARM-LABEL: t2: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -61,12 +59,12 @@ define void @t2() nounwind ssp { ; ARM: mov r0, r1 ; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload ; ARM: bl {{_?}}memcpy -; ARM-LONG: t2 +; ARM-LONG-LABEL: t2: ; ARM-LONG: {{(movw r3, :lower16:L_memcpy\$non_lazy_ptr)|(ldr r3, .LCPI)}} ; ARM-LONG: {{(movt r3, :upper16:L_memcpy\$non_lazy_ptr)?}} ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 -; THUMB: t2 +; THUMB-LABEL: t2: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -78,7 +76,7 @@ define void @t2() nounwind ssp { ; THUMB: mov r0, r1 ; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload ; THUMB: bl {{_?}}memcpy -; THUMB-LONG: t2 +; THUMB-LONG-LABEL: t2: ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] @@ -90,7 +88,7 @@ define void @t2() nounwind ssp { declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind define void @t3() nounwind ssp { -; ARM: t3 +; ARM-LABEL: t3: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -99,12 +97,12 @@ define void @t3() nounwind ssp { ; ARM: movw r2, #10 ; ARM: mov r0, r1 ; ARM: bl {{_?}}memmove -; ARM-LONG: t3 +; ARM-LONG-LABEL: t3: ; ARM-LONG: {{(movw r3, :lower16:L_memmove\$non_lazy_ptr)|(ldr r3, .LCPI)}} ; ARM-LONG: {{(movt r3, :upper16:L_memmove\$non_lazy_ptr)?}} ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 -; THUMB: t3 +; THUMB-LABEL: t3: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -116,7 +114,7 @@ define void @t3() nounwind ssp { ; THUMB: mov r0, r1 ; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload ; THUMB: bl {{_?}}memmove -; THUMB-LONG: t3 +; THUMB-LONG-LABEL: t3: ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] @@ -126,7 +124,7 @@ define void @t3() nounwind ssp { } define void @t4() nounwind ssp { -; ARM: t4 +; ARM-LABEL: t4: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -137,7 +135,7 @@ define void @t4() nounwind ssp { ; ARM: ldrh r1, [r0, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr -; THUMB: t4 +; THUMB-LABEL: t4: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -155,7 +153,7 @@ define void @t4() nounwind ssp { declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind define void @t5() nounwind ssp { -; ARM: t5 +; ARM-LABEL: t5: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -170,7 +168,7 @@ define void @t5() nounwind ssp { ; ARM: ldrh r1, [r0, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr -; THUMB: t5 +; THUMB-LABEL: t5: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -190,7 +188,7 @@ define void @t5() nounwind ssp { } define void @t6() nounwind ssp { -; ARM: t6 +; ARM-LABEL: t6: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -215,7 +213,7 @@ define void @t6() nounwind ssp { ; ARM: ldrb r1, [r0, #25] ; ARM: strb r1, [r0, #13] ; ARM: bx lr -; THUMB: t6 +; THUMB-LABEL: t6: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -253,9 +251,9 @@ define void @t7() nounwind ssp { define i32 @t8(i32 %x) nounwind { entry: -; ARM: t8 +; ARM-LABEL: t8: ; ARM-NOT: FastISel missed call: %expval = call i32 @llvm.expect.i32(i32 %x, i32 1) -; THUMB: t8 +; THUMB-LABEL: t8: ; THUMB-NOT: FastISel missed call: %expval = call i32 @llvm.expect.i32(i32 %x, i32 1) %expval = call i32 @llvm.expect.i32(i32 %x, i32 1) ret i32 %expval diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll index 93c14a0..9bd0a51 100644 --- a/test/CodeGen/ARM/fast-isel-static.ll +++ b/test/CodeGen/ARM/fast-isel-static.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s -; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s -; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s define void @myadd(float* %sum, float* %addend) nounwind { entry: diff --git a/test/CodeGen/ARM/fast-tail-call.ll b/test/CodeGen/ARM/fast-tail-call.ll index 9fbdc9d..6472016 100644 --- a/test/CodeGen/ARM/fast-tail-call.ll +++ b/test/CodeGen/ARM/fast-tail-call.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv7-linux-gnueabi -O0 -arm-tail-calls < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-linux-gnueabi -O0 < %s | FileCheck %s ; Primarily a non-crash test: Thumbv7 Linux does not have FastISel support, ; which led (via a convoluted route) to DAG nodes after a TC_RETURN that diff --git a/test/CodeGen/ARM/fastcc-vfp.ll b/test/CodeGen/ARM/fastcc-vfp.ll new file mode 100644 index 0000000..4c98150 --- /dev/null +++ b/test/CodeGen/ARM/fastcc-vfp.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios -mattr=+vfp2 | FileCheck %s + +define fastcc double @t1(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) { +entry: +; CHECK-LABEL: t1: +; CHECK-NOT: vmov +; CHECK: vldr + %add = fadd float %a, %b + %conv = fpext float %add to double + ret double %conv +} + +define fastcc double @t2(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) { +entry: +; CHECK-LABEL: t2: +; CHECK-NOT: vmov +; CHECK: vldr + %add = fadd double %a, %c + ret double %add +} + +define fastcc float @t3(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) { +entry: +; CHECK-LABEL: t3: +; CHECK: vldr + %add = fadd float %a, %c + ret float %add +} + +define fastcc double @t4(double %a, double %b) #0 { +entry: +; CHECK-LABEL: t4: +; CHECK: vstr + %add = fadd double %a, %b + %sub = fsub double %a, %b + %call = tail call fastcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub) #2 + ret double %call +} + +declare fastcc double @x(double, double, double, double, double, double, double, float, double) diff --git a/test/CodeGen/ARM/fastisel-thumb-litpool.ll b/test/CodeGen/ARM/fastisel-thumb-litpool.ll new file mode 100644 index 0000000..aa9e726 --- /dev/null +++ b/test/CodeGen/ARM/fastisel-thumb-litpool.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=thumbv7-apple-ios -O0 -o - %s | FileCheck %s + +; We used to accidentally create both an ARM and a Thumb ldr here. It led to an +; assertion failure at the time, but could go all the way through to emission, +; hence the CHECK-NOT. + +define i32 @test_thumb_ldrlit() minsize { +; CHECK: ldr r0, LCPI0_0 +; CHECK-NOT: ldr + ret i32 12345678 +} diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index a4fecfe..7cab766 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2 +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NFP0 +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll index 6db2385..f3406cc 100644 --- a/test/CodeGen/ARM/fixunsdfdi.ll +++ b/test/CodeGen/ARM/fixunsdfdi.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 -; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null +; RUN: llc -mtriple=arm-eabi -mattr=vfp2 %s -o - | FileCheck %s define hidden i64 @__fixunsdfdi(double %x) nounwind readnone { entry: @@ -27,3 +27,6 @@ bb7: ; preds = %bb3 bb10: ; preds = %entry ret i64 0 } + +; CHECK-NOT: vstr.64 + diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index f2486c6..6f8c0fe 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 -; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2 +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8 +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9 +; RUN: llc -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard %s -o - | FileCheck %s -check-prefix=HARD define float @t1(float %acc, float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll index eb72faf..a3669b4 100644 --- a/test/CodeGen/ARM/fmdrr-fmrrd.ll +++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr -; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd +; RUN: llc -mtriple=arm-eabi -mattr=vfp2 %s -o - | FileCheck %s ; naive codegen for this is: ; _i: @@ -11,3 +10,8 @@ define i64 @test(double %X) { %Y = bitcast double %X to i64 ret i64 %Y } + +; CHECK-LABEL: test: +; CHECK-NOT: fmdrr +; CHECK-NOT: fmrrd + diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll index f16ec17..5aff74c 100644 --- a/test/CodeGen/ARM/fmscs.ll +++ b/test/CodeGen/ARM/fmscs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2 +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8 define float @t1(float %acc, float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index d11f6bd..b24d867 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -1,9 +1,20 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U -; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP0 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8U + +; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8U + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll index dc4c2e3..36af835 100644 --- a/test/CodeGen/ARM/fnegs.ll +++ b/test/CodeGen/ARM/fnegs.ll @@ -1,9 +1,20 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U -; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP0 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8U + +; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA8U + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \ +; RUN: | FileCheck %s -check-prefix=CORTEXA9 define float @test1(float* %a) { entry: diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll index 825feaa..ab35a97 100644 --- a/test/CodeGen/ARM/fnmacs.ll +++ b/test/CodeGen/ARM/fnmacs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2 +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8 define float @t1(float %acc, float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 78ccb60..5fa6b21 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,9 +1,20 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U -; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8U +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: | FileCheck %s -check-prefix=NEON + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=A8 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \ +; RUN: | FileCheck %s -check-prefix=A8 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix=A8U + +; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=A8U define float @t1(float %acc, float %a, float %b) nounwind { entry: diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll index 6d7bc05..e14e5ba 100644 --- a/test/CodeGen/ARM/fnmul.ll +++ b/test/CodeGen/ARM/fnmul.ll @@ -1,5 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64 -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64 +; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s + +; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix CHECK-ROUNDING + define double @t1(double %a, double %b) { @@ -9,3 +12,6 @@ entry: ret double %tmp4 } +; CHECK: vnmul.f64 +; CHECK-ROUNDING: vmul.f64 + diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll index 3223885..de3b053 100644 --- a/test/CodeGen/ARM/fnmuls.ll +++ b/test/CodeGen/ARM/fnmuls.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind { ; CHECK: vnmul.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll index 1ba561d..dc5419f 100644 --- a/test/CodeGen/ARM/fold-const.ll +++ b/test/CodeGen/ARM/fold-const.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s define i32 @f(i32 %a) nounwind readnone optsize ssp { entry: diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll index 67fd129..695a20b 100644 --- a/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/test/CodeGen/ARM/fold-stack-adjust.ll @@ -1,6 +1,7 @@ -; RUN: llc -mtriple=thumbv7-apple-darwin-eabi < %s | FileCheck %s -; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1 +; RUN: llc -mtriple=thumbv7-apple-none-macho < %s | FileCheck %s +; RUN: llc -mtriple=thumbv6m-apple-none-macho -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS +; RUN: llc -mtriple=thumbv7--linux-gnueabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-LINUX declare void @bar(i8*) @@ -11,11 +12,11 @@ declare void @bar(i8*) define void @check_simple() minsize { ; CHECK-LABEL: check_simple: -; CHECK: push.w {r7, r8, r9, r10, r11, lr} +; CHECK: push {r3, r4, r5, r6, r7, lr} ; CHECK-NOT: sub sp, sp, ; ... ; CHECK-NOT: add sp, sp, -; CHECK: pop.w {r0, r1, r2, r3, r11, pc} +; CHECK: pop {r0, r1, r2, r3, r7, pc} ; CHECK-T1-LABEL: check_simple: ; CHECK-T1: push {r3, r4, r5, r6, r7, lr} @@ -43,11 +44,11 @@ define void @check_simple() minsize { define void @check_simple_too_big() minsize { ; CHECK-LABEL: check_simple_too_big: -; CHECK: push.w {r11, lr} +; CHECK: push {r7, lr} ; CHECK: sub sp, ; ... ; CHECK: add sp, -; CHECK: pop.w {r11, pc} +; CHECK: pop {r7, pc} %var = alloca i8, i32 64 call void @bar(i8* %var) ret void @@ -92,16 +93,16 @@ define void @check_vfp_fold() minsize { ; folded in except that doing so would clobber the value being returned. define i64 @check_no_return_clobber() minsize { ; CHECK-LABEL: check_no_return_clobber: -; CHECK: push.w {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr} ; CHECK-NOT: sub sp, ; ... -; CHECK: add sp, #40 -; CHECK: pop.w {r11, pc} +; CHECK: add sp, #24 +; CHECK: pop {r7, pc} ; Just to keep iOS FileCheck within previous function: ; CHECK-IOS-LABEL: check_no_return_clobber: - %var = alloca i8, i32 40 + %var = alloca i8, i32 20 call void @bar(i8* %var) ret i64 0 } @@ -161,4 +162,57 @@ end: ; We want the epilogue to be the only thing in a basic block so that we hit ; the correct edge-case (first inst in block is correct one to adjust). ret void -} \ No newline at end of file +} + +define void @test_varsize(...) minsize { +; CHECK-T1-LABEL: test_varsize: +; CHECK-T1: sub sp, #16 +; CHECK-T1: push {r2, r3, r4, r5, r7, lr} +; ... +; CHECK-T1: pop {r2, r3, r4, r5, r7} +; CHECK-T1: pop {r3} +; CHECK-T1: add sp, #16 +; CHECK-T1: bx r3 + +; CHECK-LABEL: test_varsize: +; CHECK: sub sp, #16 +; CHECK: push {r5, r6, r7, lr} +; ... +; CHECK: pop.w {r2, r3, r7, lr} +; CHECK: add sp, #16 +; CHECK: bx lr + + %var = alloca i8, i32 8 + call void @bar(i8* %var) + ret void +} + +%"MyClass" = type { i8*, i32, i32, float, float, float, [2 x i8], i32, i32* } + +declare float @foo() + +declare void @bar3() + +declare %"MyClass"* @bar2(%"MyClass"* returned, i16*, i32, float, float, i32, i32, i1 zeroext, i1 zeroext, i32) + +define fastcc float @check_vfp_no_return_clobber2(i16* %r, i16* %chars, i32 %length, i1 zeroext %flag) minsize { +entry: +; CHECK-LINUX-LABEL: check_vfp_no_return_clobber2 +; CHECK-LINUX: vpush {d0, d1, d2, d3, d4, d5, d6, d7, d8} +; CHECK-NOT: sub sp, +; ... +; CHECK-LINUX: add sp +; CHECK-LINUX: vpop {d8} + %run = alloca %"MyClass", align 4 + %call = call %"MyClass"* @bar2(%"MyClass"* %run, i16* %chars, i32 %length, float 0.000000e+00, float 0.000000e+00, i32 1, i32 1, i1 zeroext false, i1 zeroext true, i32 3) + %call1 = call float @foo() + %cmp = icmp eq %"MyClass"* %run, null + br i1 %cmp, label %exit, label %if.then + +if.then: ; preds = %entry + call void @bar3() + br label %exit + +exit: ; preds = %if.then, %entry + ret float %call1 +} diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll index 4ac10ba..05a6be1 100644 --- a/test/CodeGen/ARM/formal.ll +++ b/test/CodeGen/ARM/formal.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null declare void @bar(i64 %x, i64 %y) diff --git a/test/CodeGen/ARM/fp-arg-shuffle.ll b/test/CodeGen/ARM/fp-arg-shuffle.ll index ae02b79..4996cc8 100644 --- a/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -float-abi=soft %s -o - | FileCheck %s ; CHECK: function1 ; CHECK-NOT: vmov diff --git a/test/CodeGen/ARM/fp-fast.ll b/test/CodeGen/ARM/fp-fast.ll index ec57187..7d95a5e 100644 --- a/test/CodeGen/ARM/fp-fast.ll +++ b/test/CodeGen/ARM/fp-fast.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=arm -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s ; CHECK: test1 define float @test1(float %x) { diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll index fbf3a4a..7e1f000 100644 --- a/test/CodeGen/ARM/fp.ll +++ b/test/CodeGen/ARM/fp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+vfp2 %s -o - | FileCheck %s define float @f(i32 %a) { ;CHECK-LABEL: f: diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll index a5c1aed..fba7946 100644 --- a/test/CodeGen/ARM/fp16.ll +++ b/test/CodeGen/ARM/fp16.ll @@ -9,7 +9,7 @@ target triple = "armv7-eabi" define arm_aapcs_vfpcc void @foo() nounwind { ; CHECK-LABEL: foo: -; CHECK-FP6-LABEL: foo: +; CHECK-FP16-LABEL: foo: entry: %0 = load i16* @x, align 2 %1 = load i16* @y, align 2 diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index f0d9100..6f47075 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -1,9 +1,20 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix=NEON + +; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=NEON + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 define i32 @test1(float %a, float %b) { ; VFP2-LABEL: test1: diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 3a0af16..eab5988 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s + ; rdar://7461510 ; rdar://10964603 diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll index 916a1ae..e3ffd45 100644 --- a/test/CodeGen/ARM/fpcmp.ll +++ b/test/CodeGen/ARM/fpcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s define i32 @f1(float %a) { ;CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll index 0679a47..5a45a9b 100644 --- a/test/CodeGen/ARM/fpconsts.ll +++ b/test/CodeGen/ARM/fpconsts.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+vfp3 %s -o - | FileCheck %s define float @t1(float %x) nounwind readnone optsize { entry: diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll index 326e062..eadf9af 100644 --- a/test/CodeGen/ARM/fpconv.ll +++ b/test/CodeGen/ARM/fpconv.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s --check-prefix=CHECK-VFP +; RUN: llc -mtriple=arm-apple-darwin %s -o - | FileCheck %s define float @f1(double %x) { ;CHECK-VFP-LABEL: f1: diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll index 8fbd1d8..3a454ed 100644 --- a/test/CodeGen/ARM/fpmem.ll +++ b/test/CodeGen/ARM/fpmem.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc -mtriple=arm -float-abi=soft -mattr=+vfp2 %s -o - | FileCheck %s define float @f1(float %a) { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll index 6d48792..3e37724 100644 --- a/test/CodeGen/ARM/fpow.ll +++ b/test/CodeGen/ARM/fpow.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define double @t(double %x, double %y) nounwind optsize { entry: diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll index 7408687..c721756 100644 --- a/test/CodeGen/ARM/fptoint.ll +++ b/test/CodeGen/ARM/fptoint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s @i = weak global i32 0 ; [#uses=2] @u = weak global i32 0 ; [#uses=2] diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll index 617b018..baff34a 100644 --- a/test/CodeGen/ARM/fsubs.ll +++ b/test/CodeGen/ARM/fsubs.ll @@ -1,8 +1,17 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U -; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1U -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP2 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP1 + +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP1U + +; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP1U + +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: | FileCheck %s -check-prefix=NFP0 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll index 893b426..d268585 100644 --- a/test/CodeGen/ARM/hello.ll +++ b/test/CodeGen/ARM/hello.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -march=arm -; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1 -; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \ -; RUN: grep mov | count 2 -; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2 +; RUN: llc -mtriple=arm-eabi %s -o /dev/null +; RUN: llc -mtriple=armv6-linux-gnueabi %s -o - | FileCheck %s + +; RUN: llc -mtriple=armv6-linux-gnu --disable-fp-elim %s -o - \ +; RUN: | FileCheck %s -check-prefix CHECK-FP-ELIM + +; RUN: llc -mtriple=armv6-apple-ios %s -o - \ +; RUN: | FileCheck %s -check-prefix CHECK-FP-ELIM @str = internal constant [12 x i8] c"Hello World\00" @@ -12,3 +15,11 @@ define i32 @main() { } declare i32 @puts(i8*) + +; CHECK: mov +; CHECK-NOT: mov + +; CHECK-FP-ELIM: mov +; CHECK-FP-ELIM: mov +; CHECK-FP-ELIM-NOT: mov + diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll index 600a8c2..c52caf6 100644 --- a/test/CodeGen/ARM/iabs.ll +++ b/test/CodeGen/ARM/iabs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s ;; Integer absolute value, should produce something as good as: ARM: ;; movs r0, r0 diff --git a/test/CodeGen/ARM/ifconv-kills.ll b/test/CodeGen/ARM/ifconv-kills.ll index bf54ba2..de80c92 100644 --- a/test/CodeGen/ARM/ifconv-kills.ll +++ b/test/CodeGen/ARM/ifconv-kills.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march arm -mcpu swift -verify-machineinstrs +; RUN: llc -mtriple arm-eabi -mcpu swift -verify-machineinstrs %s -o /dev/null declare i32 @f(i32 %p0, i32 %p1) diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll new file mode 100644 index 0000000..86ed5b2 --- /dev/null +++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -mtriple=armv4t--linux-androideabi -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s +; Fix a bug triggered in IfConverterTriangle when CvtBB has multiple +; predecessors. +; PR18752 + +%classK = type { i8, %classF } +%classF = type { i8 } +%classL = type { %classG, i32, i32 } +%classG = type { %classL* } +%classM2 = type { %classL } + +define zeroext i1 @test(%classK* %this, %classM2* nocapture readnone %p1, %classM2* nocapture readnone %p2) align 2 { +entry: + br i1 undef, label %for.end, label %for.body + +; Before if conversion, we have +; for.body -> lor.lhs.false.i (62) +; -> for.cond.backedge (62) +; lor.lhs.false.i -> for.cond.backedge (1048575) +; -> cond.false.i (1) +; Afer if conversion, we have +; for.body -> for.cond.backedge (130023362) +; -> cond.false.i (62) +; CHECK: BB#1: derived from LLVM BB %for.body +; CHECK: Successors according to CFG: BB#2(130023362) BB#4(62) +for.body: + br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i + +for.cond.backedge: + %tobool = icmp eq %classL* undef, null + br i1 %tobool, label %for.end, label %for.body + +lor.lhs.false.i: + %tobool.i.i7 = icmp eq i32 undef, 0 + br i1 %tobool.i.i7, label %for.cond.backedge, label %cond.false.i + +cond.false.i: + call void @_Z3fn1v() + unreachable + +for.end: + br i1 undef, label %if.else.i.i, label %if.then.i.i + +if.then.i.i: + store %classL* null, %classL** undef, align 4 + br label %_ZN1M6spliceEv.exit + +if.else.i.i: + store %classL* null, %classL** null, align 4 + br label %_ZN1M6spliceEv.exit + +_ZN1M6spliceEv.exit: + %LIS = getelementptr inbounds %classK* %this, i32 0, i32 1 + call void @_ZN1F10handleMoveEb(%classF* %LIS, i1 zeroext false) + unreachable +} + +declare %classL* @_ZN1M1JI1LS1_EcvPS1_Ev(%classM2*) +declare void @_ZN1F10handleMoveEb(%classF*, i1 zeroext) +declare void @_Z3fn1v() + +!0 = metadata !{metadata !"clang version 3.5"} diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll new file mode 100644 index 0000000..cd8a561 --- /dev/null +++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s + +%struct.S = type { i8* (i8*)*, [1 x i8] } +define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly { +entry: + %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0 + %1 = load i8* %0, align 1 + %2 = zext i8 %1 to i32 + %3 = and i32 %2, 112 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %return, label %bb + +bb: + %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0 + %6 = load i8* %5, align 1 + %7 = zext i8 %6 to i32 + %8 = and i32 %7, 112 + %9 = icmp eq i32 %8, 0 + br i1 %9, label %return, label %bb2 + +; CHECK: BB#2: derived from LLVM BB %bb2 +; CHECK: Successors according to CFG: BB#3(192) BB#4(192) + +bb2: + %v10 = icmp eq i32 %3, 16 + br i1 %v10, label %bb4, label %bb3, !prof !0 + +bb3: + %v11 = icmp eq i32 %8, 16 + br i1 %v11, label %bb4, label %return, !prof !1 + +bb4: + %v12 = ptrtoint %struct.S* %x to i32 + %phitmp = trunc i32 %v12 to i8 + ret i8 %phitmp + +return: + ret i8 1 +} + +!0 = metadata !{metadata !"branch_weights", i32 4, i32 12} +!1 = metadata !{metadata !"branch_weights", i32 8, i32 16} diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll index 5a55653..cae2399 100644 --- a/test/CodeGen/ARM/ifcvt1.ll +++ b/test/CodeGen/ARM/ifcvt1.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s -check-prefix=SWIFT +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8 +; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s -check-prefix=SWIFT define i32 @t1(i32 %a, i32 %b) { ; A8-LABEL: t1: diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll index e34edec..e445416 100644 --- a/test/CodeGen/ARM/ifcvt2.ll +++ b/test/CodeGen/ARM/ifcvt2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: t1: diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll index fa7d618..5da63dc 100644 --- a/test/CodeGen/ARM/ifcvt3.ll +++ b/test/CodeGen/ARM/ifcvt3.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1 -; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2 +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -check-prefix CHECK-V4-CMP +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -check-prefix CHECK-V4-BX define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: t1: @@ -22,3 +22,11 @@ cond_next: %tmp15 = add i32 %b, %a ret i32 %tmp15 } + +; CHECK-V4-CMP: cmpne +; CHECK-V4-CMP-NOT: cmpne + +; CHECK-V4-BX: bx +; CHECK-V4-BX: bx +; CHECK-V4-BX-NOT: bx + diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll index 53c789d..8c6825a 100644 --- a/test/CodeGen/ARM/ifcvt4.ll +++ b/test/CodeGen/ARM/ifcvt4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; Do not if-convert when branches go to the different loops. ; CHECK-LABEL: t: diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll index 05bdc45..1191716 100644 --- a/test/CodeGen/ARM/ifcvt9.ll +++ b/test/CodeGen/ARM/ifcvt9.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define fastcc void @t() nounwind { entry: diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll index febe6f5..7208fff 100644 --- a/test/CodeGen/ARM/illegal-vector-bitcast.ll +++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -; RUN: llc < %s -mtriple=arm-linux +; RUN: llc -mtriple=arm-eabi %s -o /dev/null +; RUN: llc -mtriple=arm-linux %s -o /dev/null define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y) { diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll index 6f25f9d..e7bc0af 100644 --- a/test/CodeGen/ARM/imm.ll +++ b/test/CodeGen/ARM/imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep CPI +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @test1(i32 %A) { %B = add i32 %A, -268435441 ; [#uses=1] @@ -14,3 +14,6 @@ define i32 @test3(i32 %A) { ret i32 %B } +; CHECK-NOT: CPI + + diff --git a/test/CodeGen/ARM/indirect-reg-input.ll b/test/CodeGen/ARM/indirect-reg-input.ll index b936455..17f6a9c 100644 --- a/test/CodeGen/ARM/indirect-reg-input.ll +++ b/test/CodeGen/ARM/indirect-reg-input.ll @@ -1,4 +1,4 @@ -; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s +; RUN: not llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - 2>&1 | FileCheck %s ; Check for error message: ; CHECK: error: inline asm not supported yet: don't know how to handle tied indirect register inputs diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 1aeeb91..7c49cb3 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -11,6 +11,11 @@ define internal i32 @foo(i32 %i) nounwind { ; THUMB-LABEL: foo: ; THUMB2-LABEL: foo: entry: + ; _nextaddr gets CSEed for use later on. +; THUMB: ldr r[[NEXTADDR_REG:[0-9]+]], [[NEXTADDR_CPI:LCPI0_[0-9]+]] +; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]: +; THUMB: add r[[NEXTADDR_REG]], pc + %0 = load i8** @nextaddr, align 4 ; [#uses=2] %1 = icmp eq i8* %0, null ; [#uses=1] ; indirect branch gets duplicated here @@ -53,12 +58,11 @@ L1: ; preds = %L2, %bb2 ; ARM: ldr [[R1:r[0-9]+]], LCPI ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]] ; ARM: str [[R1b]] + ; THUMB-LABEL: %L1 -; THUMB: ldr -; THUMB: add ; THUMB: ldr [[R2:r[0-9]+]], LCPI ; THUMB: add [[R2]], pc -; THUMB: str [[R2]] +; THUMB: str [[R2]], [r[[NEXTADDR_REG]]] ; THUMB2-LABEL: %L1 ; THUMB2: ldr [[R2:r[0-9]+]], LCPI ; THUMB2-NEXT: str{{(.w)?}} [[R2]] @@ -67,4 +71,5 @@ L1: ; preds = %L2, %bb2 } ; ARM: .long Ltmp0-(LPC{{.*}}+8) ; THUMB: .long Ltmp0-(LPC{{.*}}+4) +; THUMB: .long _nextaddr-([[NEXTADDR_PCBASE]]+4) ; THUMB2: .long Ltmp0 diff --git a/test/CodeGen/ARM/inline-diagnostics.ll b/test/CodeGen/ARM/inline-diagnostics.ll new file mode 100644 index 0000000..7b77da2 --- /dev/null +++ b/test/CodeGen/ARM/inline-diagnostics.ll @@ -0,0 +1,16 @@ +; RUN: not llc < %s -verify-machineinstrs -mtriple=armv7-none-linux-gnu -mattr=+neon 2>&1 | FileCheck %s + +%struct.float4 = type { float, float, float, float } + +; CHECK: error: Don't know how to handle indirect register inputs yet for constraint 'w' +define float @inline_func(float %f1, float %f2) #0 { + %c1 = alloca %struct.float4, align 4 + %c2 = alloca %struct.float4, align 4 + %c3 = alloca %struct.float4, align 4 + call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* %c1, %struct.float4* %c2, %struct.float4* %c3) #1, !srcloc !1 + %x = getelementptr inbounds %struct.float4* %c3, i32 0, i32 0 + %1 = load float* %x, align 4 + ret float %1 +} + +!1 = metadata !{i32 271, i32 305} diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll index 683a0c4..d098a43 100644 --- a/test/CodeGen/ARM/inlineasm-64bit.ll +++ b/test/CodeGen/ARM/inlineasm-64bit.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O3 -mtriple=arm-linux-gnueabi | FileCheck %s -; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s +; RUN: llc < %s -O3 -mtriple=arm-linux-gnueabi -no-integrated-as | FileCheck %s +; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs -no-integrated-as < %s | FileCheck %s ; check if regs are passing correctly define void @i64_write(i64* %p, i64 %val) nounwind { ; CHECK-LABEL: i64_write: diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll index 45dfcf0..603e52d 100644 --- a/test/CodeGen/ARM/inlineasm-imm-arm.ll +++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi -no-integrated-as %s -o /dev/null ; Test ARM-mode "I" constraint, for any Data Processing immediate. define i32 @testI(i32 %x) { diff --git a/test/CodeGen/ARM/inlineasm-ldr-pseudo.ll b/test/CodeGen/ARM/inlineasm-ldr-pseudo.ll new file mode 100644 index 0000000..f63e4b0 --- /dev/null +++ b/test/CodeGen/ARM/inlineasm-ldr-pseudo.ll @@ -0,0 +1,17 @@ +; PR18354 +; We actually need to use -filetype=obj in this test because if we output +; assembly, the current code path will bypass the parser and just write the +; raw text out to the Streamer. We need to actually parse the inlineasm to +; demonstrate the bug. Going the asm->obj route does not show the issue. +; RUN: llc -mtriple=arm-none-linux < %s -filetype=obj | llvm-objdump -d - | FileCheck %s +; RUN: llc -mtriple=arm-apple-darwin < %s -filetype=obj | llvm-objdump -d - | FileCheck %s +; CHECK-LABEL: foo: +; CHECK: 0: 00 00 9f e5 ldr r0, [pc] +; CHECK: 4: 0e f0 a0 e1 mov pc, lr +; Make sure the constant pool entry comes after the return +; CHECK: 8: 01 00 00 00 +define i32 @foo() nounwind { +entry: + %0 = tail call i32 asm sideeffect "ldr $0,=1", "=r"() nounwind + ret i32 %0 +} diff --git a/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll new file mode 100644 index 0000000..3be378d --- /dev/null +++ b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll @@ -0,0 +1,18 @@ +;RUN: llc -mtriple=armv7-linux-gnueabi < %s | llvm-mc -triple=armv7-linux-gnueabi -filetype=obj | llvm-objdump -triple=armv7 -d - | FileCheck %s +;RUN: llc -mtriple=armv7-linux-gnueabi < %s | FileCheck %s -check-prefix=ASM +;RUN: llc -mtriple=armv7-apple-darwin < %s | FileCheck %s -check-prefix=ASM + +define hidden i32 @bah(i8* %start) #0 align 2 { + %1 = ptrtoint i8* %start to i32 + %2 = tail call i32 asm sideeffect "@ Enter THUMB Mode\0A\09adr r3, 2f+1 \0A\09bx r3 \0A\09.code 16 \0A2: push {r7} \0A\09mov r7, $4 \0A\09svc 0x0 \0A\09pop {r7} \0A\09", "={r0},{r0},{r1},{r2},r,~{r3}"(i32 %1, i32 %1, i32 0, i32 983042) #3 + %3 = add i32 %1, 1 + ret i32 %3 +} +; CHECK: $t +; CHECK: $a +; CHECK: 01 00 81 e2 add r0, r1, #1 + +; .code 32 is implicit +; ASM-LABEL: bah: +; ASM: .code 16 +; ASM: .code 32 diff --git a/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll new file mode 100644 index 0000000..b9bd4c2 --- /dev/null +++ b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll @@ -0,0 +1,18 @@ +;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj | llvm-objdump -triple=thumbv7 -d - | FileCheck %s +;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | FileCheck %s -check-prefix=ASM +;RUN: llc -mtriple=thumbv7-apple-darwin < %s | FileCheck %s -check-prefix=ASM + +define hidden i32 @bah(i8* %start) #0 align 2 { + %1 = ptrtoint i8* %start to i32 + %2 = tail call i32 asm sideeffect "@ Enter ARM Mode \0A\09adr r3, 1f \0A\09bx r3 \0A\09.align 2 \0A\09.code 32 \0A1: push {r7} \0A\09mov r7, $4 \0A\09svc 0x0 \0A\09pop {r7} \0A\09", "={r0},{r0},{r1},{r2},r,~{r3}"(i32 %1, i32 %1, i32 0, i32 983042) #3 + %3 = add i32 %1, 1 + ret i32 %3 +} +; CHECK: $a +; CHECK: $t +; CHECK: 48 1c adds r0, r1, #1 + +; ASM: .code 16 +; ASM-LABEL: bah: +; ASM: .code 32 +; ASM: .code 16 diff --git a/test/CodeGen/ARM/inlineasm-switch-mode.ll b/test/CodeGen/ARM/inlineasm-switch-mode.ll new file mode 100644 index 0000000..65fea11 --- /dev/null +++ b/test/CodeGen/ARM/inlineasm-switch-mode.ll @@ -0,0 +1,22 @@ +;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj > %t +; Two pass decoding needed because llvm-objdump does not respect mapping symbols +;RUN: llvm-objdump -triple=armv7 -d %t | FileCheck %s --check-prefix=ARM +;RUN: llvm-objdump -triple=thumbv7 -d %t | FileCheck %s --check-prefix=THUMB + +define hidden i32 @bah(i8* %start) #0 align 2 { + %1 = ptrtoint i8* %start to i32 + %2 = tail call i32 asm sideeffect "@ Enter ARM Mode \0A\09adr r3, 1f \0A\09bx r3 \0A\09.align 2 \0A\09.code 32 \0A1: push {r7} \0A\09mov r7, $4 \0A\09svc 0x0 \0A\09pop {r7} \0A\09@ Enter THUMB Mode\0A\09adr r3, 2f+1 \0A\09bx r3 \0A\09.code 16 \0A2: \0A\09", "={r0},{r0},{r1},{r2},r,~{r3}"(i32 %1, i32 %1, i32 0, i32 983042) #3 + %3 = add i32 %1, 1 + ret i32 %3 +} + +; ARM: $a +; ARM-NEXT: 04 70 2d e5 str r7, [sp, #-4]! +; ARM: $t +; ARM-NEXT: 48 1c + +; THUMB: $a +; THUMB-NEXT: 04 70 +; THUMB-NEXT: 2d e5 +; THUMB: $t +; THUMB-NEXT: 48 1c adds r0, r1, #1 diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll index cca3c69..39962e0 100644 --- a/test/CodeGen/ARM/inlineasm.ll +++ b/test/CodeGen/ARM/inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null define i32 @test1(i32 %tmp54) { %tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 ) ; [#uses=1] diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll index a99bccf..5918738 100644 --- a/test/CodeGen/ARM/inlineasm2.ll +++ b/test/CodeGen/ARM/inlineasm2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define double @__ieee754_sqrt(double %x) { %tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x ) diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll index 390a44e..eb7ba59 100644 --- a/test/CodeGen/ARM/inlineasm3.ll +++ b/test/CodeGen/ARM/inlineasm3.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon,+v6t2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon,+v6t2 -no-integrated-as %s -o - \ +; RUN: | FileCheck %s ; Radar 7449043 %struct.int32x4_t = type { <4 x i32> } diff --git a/test/CodeGen/ARM/inlineasm4.ll b/test/CodeGen/ARM/inlineasm4.ll index 4a1bcca..a117cd2 100644 --- a/test/CodeGen/ARM/inlineasm4.ll +++ b/test/CodeGen/ARM/inlineasm4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define double @f(double %x) { entry: diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll index d188fae..2749a8e 100644 --- a/test/CodeGen/ARM/insn-sched1.ll +++ b/test/CodeGen/ARM/insn-sched1.ll @@ -1,6 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+v6 -; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\ -; RUN: grep mov | count 3 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null +; RUN: llc -mtriple=arm-apple-ios -mattr=+v6 %s -o - | FileCheck %s define i32 @test(i32 %x) { %tmp = trunc i32 %x to i16 ; [#uses=1] @@ -9,3 +8,9 @@ define i32 @test(i32 %x) { } declare i32 @f(i32, i16) + +; CHECK: mov +; CHECK: mov +; CHECK: mov +; CHECK-NOT: mov + diff --git a/test/CodeGen/ARM/integer_insertelement.ll b/test/CodeGen/ARM/integer_insertelement.ll index 1d72afe..bf403b9 100644 --- a/test/CodeGen/ARM/integer_insertelement.ll +++ b/test/CodeGen/ARM/integer_insertelement.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -o - -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; This test checks that when inserting one (integer) element into a vector, ; the vector is not spuriously copied. "vorr dX, dY, dY" is the way of moving diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll index 217fd69..9b7b41b 100644 --- a/test/CodeGen/ARM/interrupt-attr.ll +++ b/test/CodeGen/ARM/interrupt-attr.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=arm-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s ; RUN: llc -mtriple=thumb-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s -; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s +; RUN: llc -mtriple=thumb-apple-none-macho -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s declare arm_aapcscc void @bar() @@ -12,32 +12,33 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" { ; Also need special function return setting pc and CPSR simultaneously. ; CHECK-A-LABEL: irq_fn: -; CHECK-A: push {r0, r1, r2, r3, r11, lr} +; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr} ; CHECK-A: add r11, sp, #16 ; CHECK-A: sub sp, sp, #{{[0-9]+}} ; CHECK-A: bic sp, sp, #7 ; CHECK-A: bl bar ; CHECK-A: sub sp, r11, #16 -; CHECK-A: pop {r0, r1, r2, r3, r11, lr} +; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr} ; CHECK-A: subs pc, lr, #4 ; CHECK-A-THUMB-LABEL: irq_fn: -; CHECK-A-THUMB: push {r0, r1, r2, r3, r4, r7, lr} -; CHECK-A-THUMB: mov r4, sp +; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r7, r12, lr} ; CHECK-A-THUMB: add r7, sp, #20 +; CHECK-A-THUMB: mov r4, sp ; CHECK-A-THUMB: bic r4, r4, #7 ; CHECK-A-THUMB: bl bar ; CHECK-A-THUMB: sub.w r4, r7, #20 ; CHECK-A-THUMB: mov sp, r4 -; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, lr} +; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, r12, lr} ; CHECK-A-THUMB: subs pc, lr, #4 ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to ; appropriate sentinel so no special return needed). +; CHECK-M-LABEL: irq_fn: ; CHECK-M: push {r4, r7, lr} ; CHECK-M: add r7, sp, #4 -; CHECK-M: sub sp, #4 ; CHECK-M: mov r4, sp +; CHECK-M: bic r4, r4, #7 ; CHECK-M: mov sp, r4 ; CHECK-M: blx _bar ; CHECK-M: subs r4, r7, #4 @@ -48,6 +49,7 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" { ret void } +; We don't push/pop r12, as it is banked for FIQ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" { ; CHECK-A-LABEL: fiq_fn: ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} @@ -61,6 +63,8 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" { ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} ; CHECK-A: subs pc, lr, #4 +; CHECK-A-THUMB-LABEL: fiq_fn: +; CHECK-M-LABEL: fiq_fn: %val = load volatile [16 x i32]* @bigvar store volatile [16 x i32] %val, [16 x i32]* @bigvar ret void @@ -68,13 +72,13 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" { define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" { ; CHECK-A-LABEL: swi_fn: -; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} ; CHECK-A: add r11, sp, #44 ; CHECK-A: sub sp, sp, #{{[0-9]+}} ; CHECK-A: bic sp, sp, #7 ; [...] ; CHECK-A: sub sp, r11, #44 -; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} ; CHECK-A: subs pc, lr, #0 %val = load volatile [16 x i32]* @bigvar @@ -84,13 +88,13 @@ define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" { define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" { ; CHECK-A-LABEL: undef_fn: -; CHECK-A: push {r0, r1, r2, r3, r11, lr} +; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr} ; CHECK-A: add r11, sp, #16 ; CHECK-A: sub sp, sp, #{{[0-9]+}} ; CHECK-A: bic sp, sp, #7 ; [...] ; CHECK-A: sub sp, r11, #16 -; CHECK-A: pop {r0, r1, r2, r3, r11, lr} +; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr} ; CHECK-A: subs pc, lr, #0 call void @bar() @@ -99,13 +103,13 @@ define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" { define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" { ; CHECK-A-LABEL: abort_fn: -; CHECK-A: push {r0, r1, r2, r3, r11, lr} +; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr} ; CHECK-A: add r11, sp, #16 ; CHECK-A: sub sp, sp, #{{[0-9]+}} ; CHECK-A: bic sp, sp, #7 ; [...] ; CHECK-A: sub sp, r11, #16 -; CHECK-A: pop {r0, r1, r2, r3, r11, lr} +; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr} ; CHECK-A: subs pc, lr, #4 call void @bar() diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll index c038fe6..96413d3 100644 --- a/test/CodeGen/ARM/intrinsics-crypto.ll +++ b/test/CodeGen/ARM/intrinsics-crypto.ll @@ -3,13 +3,13 @@ define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) { %tmp = load <16 x i8>* %a %tmp2 = load <16 x i8>* %b - %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2) ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}} - %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2) + %tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2) ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}} - %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4) + %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %tmp4) ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}} - %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5) + %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %tmp5) ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}} ret <16 x i8> %tmp6 } @@ -18,40 +18,42 @@ define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i3 %tmp = load <4 x i32>* %a %tmp2 = load <4 x i32>* %b %tmp3 = load <4 x i32>* %c - %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp) + %scalar = extractelement <4 x i32> %tmp, i32 0 + %resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar) + %res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0 ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}} - %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1) + %res2 = call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %tmp2, i32 %scalar, <4 x i32> %res1) ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1) + %res3 = call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %res2, i32 %scalar, <4 x i32> %res1) ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1) + %res4 = call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %res3, i32 %scalar, <4 x i32> %res1) ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1) + %res5 = call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1) + %res6 = call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %res5, <4 x i32> %res1) ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}} - %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1) + %res7 = call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1) + %res8 = call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1) + %res9 = call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3) + %res10 = call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %res9, <4 x i32> %tmp3) ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}} ret <4 x i32> %res10 } -declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) -declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) -declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) +declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) +declare i32 @llvm.arm.neon.sha1h(i32) +declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll index 2f1a2cf..3086d79 100644 --- a/test/CodeGen/ARM/ispositive.ll +++ b/test/CodeGen/ARM/ispositive.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @test1(i32 %X) { ; CHECK: lsr{{.*}}#31 diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll index ddf0f0e..1a9a1fa 100644 --- a/test/CodeGen/ARM/large-stack.ll +++ b/test/CodeGen/ARM/large-stack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define void @test1() { %tmp = alloca [ 64 x i32 ] , align 4 diff --git a/test/CodeGen/ARM/ldaex-stlex.ll b/test/CodeGen/ARM/ldaex-stlex.ll new file mode 100644 index 0000000..bfdfea3 --- /dev/null +++ b/test/CodeGen/ARM/ldaex-stlex.ll @@ -0,0 +1,92 @@ +; RUN: llc < %s -mtriple=armv8-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8-apple-darwin | FileCheck %s + +%0 = type { i32, i32 } + +; CHECK-LABEL: f0: +; CHECK: ldaexd +define i64 @f0(i8* %p) nounwind readonly { +entry: + %ldaexd = tail call %0 @llvm.arm.ldaexd(i8* %p) + %0 = extractvalue %0 %ldaexd, 1 + %1 = extractvalue %0 %ldaexd, 0 + %2 = zext i32 %0 to i64 + %3 = zext i32 %1 to i64 + %shl = shl nuw i64 %2, 32 + %4 = or i64 %shl, %3 + ret i64 %4 +} + +; CHECK-LABEL: f1: +; CHECK: stlexd +define i32 @f1(i8* %ptr, i64 %val) nounwind { +entry: + %tmp4 = trunc i64 %val to i32 + %tmp6 = lshr i64 %val, 32 + %tmp7 = trunc i64 %tmp6 to i32 + %stlexd = tail call i32 @llvm.arm.stlexd(i32 %tmp4, i32 %tmp7, i8* %ptr) + ret i32 %stlexd +} + +declare %0 @llvm.arm.ldaexd(i8*) nounwind readonly +declare i32 @llvm.arm.stlexd(i32, i32, i8*) nounwind + +; CHECK-LABEL: test_load_i8: +; CHECK: ldaexb r0, [r0] +; CHECK-NOT: uxtb +; CHECK-NOT: and +define zeroext i8 @test_load_i8(i8* %addr) { + %val = call i32 @llvm.arm.ldaex.p0i8(i8* %addr) + %val8 = trunc i32 %val to i8 + ret i8 %val8 +} + +; CHECK-LABEL: test_load_i16: +; CHECK: ldaexh r0, [r0] +; CHECK-NOT: uxth +; CHECK-NOT: and +define zeroext i16 @test_load_i16(i16* %addr) { + %val = call i32 @llvm.arm.ldaex.p0i16(i16* %addr) + %val16 = trunc i32 %val to i16 + ret i16 %val16 +} + +; CHECK-LABEL: test_load_i32: +; CHECK: ldaex r0, [r0] +define i32 @test_load_i32(i32* %addr) { + %val = call i32 @llvm.arm.ldaex.p0i32(i32* %addr) + ret i32 %val +} + +declare i32 @llvm.arm.ldaex.p0i8(i8*) nounwind readonly +declare i32 @llvm.arm.ldaex.p0i16(i16*) nounwind readonly +declare i32 @llvm.arm.ldaex.p0i32(i32*) nounwind readonly + +; CHECK-LABEL: test_store_i8: +; CHECK-NOT: uxtb +; CHECK: stlexb r0, r1, [r2] +define i32 @test_store_i8(i32, i8 %val, i8* %addr) { + %extval = zext i8 %val to i32 + %res = call i32 @llvm.arm.stlex.p0i8(i32 %extval, i8* %addr) + ret i32 %res +} + +; CHECK-LABEL: test_store_i16: +; CHECK-NOT: uxth +; CHECK: stlexh r0, r1, [r2] +define i32 @test_store_i16(i32, i16 %val, i16* %addr) { + %extval = zext i16 %val to i32 + %res = call i32 @llvm.arm.stlex.p0i16(i32 %extval, i16* %addr) + ret i32 %res +} + +; CHECK-LABEL: test_store_i32: +; CHECK: stlex r0, r1, [r2] +define i32 @test_store_i32(i32, i32 %val, i32* %addr) { + %res = call i32 @llvm.arm.stlex.p0i32(i32 %val, i32* %addr) + ret i32 %res +} + +declare i32 @llvm.arm.stlex.p0i8(i32, i8*) nounwind +declare i32 @llvm.arm.stlex.p0i16(i32, i16*) nounwind +declare i32 @llvm.arm.stlex.p0i32(i32, i32*) nounwind diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll index d5b805c..3977da6 100644 --- a/test/CodeGen/ARM/ldm.ll +++ b/test/CodeGen/ARM/ldm.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=armv4t-apple-darwin | FileCheck %s -check-prefix=V4T +; RUN: llc < %s -mtriple=armv7-apple-ios3.0 | FileCheck %s +; RUN: llc < %s -mtriple=armv4t-apple-ios3.0 | FileCheck %s -check-prefix=V4T @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll index e4c695b..57e9977 100644 --- a/test/CodeGen/ARM/ldr.ll +++ b/test/CodeGen/ARM/ldr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1(i32* %v) { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll index d29eb02..31aaba5 100644 --- a/test/CodeGen/ARM/ldr_ext.ll +++ b/test/CodeGen/ARM/ldr_ext.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @test1(i8* %t1) nounwind { ; CHECK: ldrb diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll index f071b89..ed964ec 100644 --- a/test/CodeGen/ARM/ldr_frame.ll +++ b/test/CodeGen/ARM/ldr_frame.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s define i32 @f1() { %buf = alloca [32 x i32], align 4 @@ -29,3 +29,6 @@ define i32 @f4() { %tmp2 = zext i8 %tmp1 to i32 ret i32 %tmp2 } + +; CHECK-NOT: mov + diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll index f5ff7dd..2558b16 100644 --- a/test/CodeGen/ARM/ldr_post.ll +++ b/test/CodeGen/ARM/ldr_post.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s ; CHECK-LABEL: test1: ; CHECK: ldr {{.*, \[.*]}}, -r2 diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll index 8281827..a97927a 100644 --- a/test/CodeGen/ARM/ldr_pre.ll +++ b/test/CodeGen/ARM/ldr_pre.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s ; CHECK-LABEL: test1: ; CHECK: ldr {{.*!}} diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 864d18a..caef2e7 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8 -check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 -check-prefix=CHECK ; rdar://6949835 -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK ; Magic ARM pair hints works best with linearscan / fast. diff --git a/test/CodeGen/ARM/ldstrex.ll b/test/CodeGen/ARM/ldstrex.ll index 5eaae53..a40e255 100644 --- a/test/CodeGen/ARM/ldstrex.ll +++ b/test/CodeGen/ARM/ldstrex.ll @@ -36,17 +36,21 @@ declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind ; CHECK-LABEL: test_load_i8: ; CHECK: ldrexb r0, [r0] ; CHECK-NOT: uxtb -define i32 @test_load_i8(i8* %addr) { +; CHECK-NOT: and +define zeroext i8 @test_load_i8(i8* %addr) { %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr) - ret i32 %val + %val8 = trunc i32 %val to i8 + ret i8 %val8 } ; CHECK-LABEL: test_load_i16: ; CHECK: ldrexh r0, [r0] ; CHECK-NOT: uxth -define i32 @test_load_i16(i16* %addr) { +; CHECK-NOT: and +define zeroext i16 @test_load_i16(i16* %addr) { %val = call i32 @llvm.arm.ldrex.p0i16(i16* %addr) - ret i32 %val + %val16 = trunc i32 %val to i16 + ret i16 %val16 } ; CHECK-LABEL: test_load_i32: @@ -137,3 +141,19 @@ define void @excl_addrmode() { ret void } + +; LLVM should know, even across basic blocks, that ldrex is setting the high +; bits of its i32 to 0. There should be no zero-extend operation. +define zeroext i8 @test_cross_block_zext_i8(i1 %tst, i8* %addr) { +; CHECK: test_cross_block_zext_i8: +; CHECK-NOT: uxtb +; CHECK-NOT: and +; CHECK: bx lr + %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr) + br i1 %tst, label %end, label %mid +mid: + ret i8 42 +end: + %val8 = trunc i32 %val to i8 + ret i8 %val8 +} diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll index 253b0e1..ca16adc 100644 --- a/test/CodeGen/ARM/load.ll +++ b/test/CodeGen/ARM/load.ll @@ -1,9 +1,4 @@ -; RUN: llc < %s -march=arm > %t -; RUN: grep ldrsb %t -; RUN: grep ldrb %t -; RUN: grep ldrsh %t -; RUN: grep ldrh %t - +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1(i8* %p) { entry: @@ -32,3 +27,9 @@ entry: %tmp4 = zext i16 %tmp to i32 ; [#uses=1] ret i32 %tmp4 } + +; CHECK: ldrsb +; CHECK: ldrb +; CHECK: ldrsh +; CHECK: ldrh + diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll index c76a5e4..f09167e 100644 --- a/test/CodeGen/ARM/long-setcc.ll +++ b/test/CodeGen/ARM/long-setcc.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | grep cmp | count 1 - +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i1 @t1(i64 %x) { %B = icmp slt i64 %x, 0 @@ -15,3 +14,7 @@ define i1 @t3(i32 %x) { %tmp = icmp ugt i32 %x, -1 ret i1 %tmp } + +; CHECK: cmp +; CHECK-NOT: cmp + diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll index 7fffc81..d0bff4a 100644 --- a/test/CodeGen/ARM/long.ll +++ b/test/CodeGen/ARM/long.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i64 @f1() { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll index 2cf91c3..5636a12 100644 --- a/test/CodeGen/ARM/longMAC.ll +++ b/test/CodeGen/ARM/longMAC.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7 ; Check generated signed and unsigned multiply accumulate long. define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { @@ -42,3 +43,28 @@ define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) { %add = add nsw i64 %mul, %conv2 ret i64 %add } + +; Two things to check here: the @earlyclobber constraint (on <= v5) and the "$Rd = $R" ones. +; + Without @earlyclobber the v7 code is natural. With it, the first two +; registers must be distinct from the third. +; + Without "$Rd = $R", this can be satisfied without a mov before the umlal +; by trying to use 6 different registers in the MachineInstr. The natural +; evolution of this attempt currently leaves only two movs in the final +; function, both after the umlal. With it, *some* move has to happen +; before the umlal. +define i64 @MACLongTest5(i64 %c, i32 %a, i32 %b) { +; CHECK-V7-LABEL: MACLongTest5: +; CHECK-V7-LABEL: umlal r0, r1, r0, r0 + +; CHECK-LABEL: MACLongTest5: +; CHECK: mov [[RDLO:r[0-9]+]], r0 +; CHECK: umlal [[RDLO]], r1, r0, r0 +; CHECK: mov r0, [[RDLO]] + + %conv.trunc = trunc i64 %c to i32 + %conv = zext i32 %conv.trunc to i64 + %conv1 = zext i32 %b to i64 + %mul = mul i64 %conv, %conv + %add = add i64 %mul, %c + ret i64 %add +} diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 3e986d80..48b0ba7 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i64 @f0(i64 %A, i64 %B) { ; CHECK-LABEL: f0: diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll index 0c8d387..9480241 100644 --- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll +++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]" +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; Should use scaled addressing mode. define void @sintzero(i32* %a) nounwind { @@ -17,3 +17,6 @@ cond_next: ; preds = %cond_next, %entry return: ; preds = %cond_next ret void } + +; CHECK: lsl{{.*}}#2] + diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index 26d4be2..1dafa00 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -4,7 +4,7 @@ ; register pressure and therefore spilling. There is more room for improvement ; here. -; CHECK: sub sp, #{{40|32|28|24}} +; CHECK: sub sp, #{{40|36|32|28|24}} ; CHECK: %for.inc ; CHECK-NOT: ldr diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll index fc9b226..ca65501 100644 --- a/test/CodeGen/ARM/machine-licm.ll +++ b/test/CodeGen/ARM/machine-licm.ll @@ -5,20 +5,12 @@ ; rdar://7354376 ; rdar://8887598 -; The generated code is no where near ideal. It's not recognizing the two -; constantpool entries being loaded can be merged into one. - @GV = external global i32 ; [#uses=2] define void @t(i32* nocapture %vals, i32 %c) nounwind { entry: ; ARM-LABEL: t: ; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0 -; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool. -; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy -; to add the pseudo instructions to make sure they are CSE'ed at the same -; time as the "ldr cp". -; ARM: ldr r{{[0-9]+}}, LCPI0_1 ; ARM: LPC0_0: ; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]] ; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}] @@ -36,7 +28,7 @@ entry: bb.nph: ; preds = %entry ; ARM: LCPI0_0: -; ARM: LCPI0_1: +; ARM-NOT: LCPI0_1: ; ARM: .section ; THUMB: BB#1 diff --git a/test/CodeGen/ARM/mature-mc-support.ll b/test/CodeGen/ARM/mature-mc-support.ll new file mode 100644 index 0000000..0a7e5b9 --- /dev/null +++ b/test/CodeGen/ARM/mature-mc-support.ll @@ -0,0 +1,12 @@ +; Test that inline assembly is parsed by the MC layer when MC support is mature +; (even when the output is assembly). + +; RUN: not llc -mtriple=arm-pc-linux < %s > /dev/null 2> %t1 +; RUN: FileCheck %s < %t1 + +; RUN: not llc -mtriple=arm-pc-linux -filetype=obj < %s > /dev/null 2> %t2 +; RUN: FileCheck %s < %t2 + +module asm " .this_directive_is_very_unlikely_to_exist" + +; CHECK: LLVM ERROR: Error parsing inline asm diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll index f46c7a5..3c9cd91 100644 --- a/test/CodeGen/ARM/mem.ll +++ b/test/CodeGen/ARM/mem.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | grep strb -; RUN: llc < %s -march=arm | grep strh +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define void @f1() { entry: @@ -7,8 +6,13 @@ entry: ret void } +; CHECK: strb + define void @f2() { entry: store i16 0, i16* null ret void } + +; CHECK: strh + diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index 946c63e..14d84de 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -38,7 +38,8 @@ entry: define void @t2(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t2: -; CHECK: ldr [[REG2:r[0-9]+]], [r1, #32] +; CHECK: movw [[REG2:r[0-9]+]], #16716 +; CHECK: movt [[REG2:r[0-9]+]], #72 ; CHECK: str [[REG2]], [r0, #32] ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] @@ -79,7 +80,8 @@ entry: ; CHECK: strb [[REG5]], [r0, #6] ; CHECK: movw [[REG6:r[0-9]+]], #21587 ; CHECK: strh [[REG6]], [r0, #4] -; CHECK: ldr [[REG7:r[0-9]+]], +; CHECK: movw [[REG7:r[0-9]+]], #18500 +; CHECK: movt [[REG7:r[0-9]+]], #22866 ; CHECK: str [[REG7]] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) ret void diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index fe0056c..8d3800b 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN +; RUN: llc < %s -mtriple=thumbv7m-none-macho -o - | FileCheck %s --check-prefix=DARWIN ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s +; RUN: llc < %s -mtriple=arm-none-eabihf -o - | FileCheck --check-prefix=EABI %s @from = common global [500 x i32] zeroinitializer, align 4 @to = common global [500 x i32] zeroinitializer, align 4 diff --git a/test/CodeGen/ARM/minsize-imms.ll b/test/CodeGen/ARM/minsize-imms.ll new file mode 100644 index 0000000..4c8ff39 --- /dev/null +++ b/test/CodeGen/ARM/minsize-imms.ll @@ -0,0 +1,57 @@ +; RUN: llc -mtriple=thumbv7m-macho -o - -show-mc-encoding %s | FileCheck %s +; RUN: llc -mtriple=thumbv6m-macho -o - -show-mc-encoding %s | FileCheck %s --check-prefix=CHECK-V6M +; RUN: llc -mtriple=armv6-macho -o - -show-mc-encoding %s | FileCheck %s --check-prefix=CHECK-ARM +define i32 @test_mov() minsize { +; CHECK-LABEL: test_mov: +; CHECK: movs r0, #255 @ encoding: [0xff,0x20] + + ret i32 255 +} + +define i32 @test_mov_mvn() minsize { +; CHECK-LABEL: test_mov_mvn: +; CHECK: mvn r0, #203 @ encoding: [0x6f,0xf0,0xcb,0x00] + +; CHECK-V6M-LABEL: test_mov_mvn: +; CHECK-V6M: movs [[TMP:r[0-7]]], #203 @ encoding: [0xcb,0x20] +; CHECK-V6M: mvns r0, [[TMP]] @ encoding: [0xc0,0x43] + +; CHECK-ARM-LABEL: test_mov_mvn: +; CHECK-ARM: mvn r0, #203 @ encoding: [0xcb,0x00,0xe0,0xe3] + ret i32 4294967092 +} + +define i32 @test_mov_lsl() minsize { +; CHECK-LABEL: test_mov_lsl: +; CHECK: mov.w r0, #589824 @ encoding: [0x4f,0xf4,0x10,0x20] + +; CHECK-V6M-LABEL: test_mov_lsl: +; CHECK-V6M: movs [[TMP:r[0-7]]], #9 @ encoding: [0x09,0x20] +; CHECK-V6M: lsls r0, [[TMP]], #16 @ encoding: [0x00,0x04] + +; CHECK-ARM-LABEL: test_mov_lsl: +; CHECK-ARM: mov r0, #589824 @ encoding: [0x09,0x08,0xa0,0xe3] + ret i32 589824 +} + +define i32 @test_movw() minsize { +; CHECK-LABEL: test_movw: +; CHECK: movw r0, #65535 + +; CHECK-V6M-LABEL: test_movw: +; CHECK-V6M: ldr r0, [[CONSTPOOL:LCPI[0-9]+_[0-9]+]] @ encoding: [A,0x48] +; CHECK-V6M: [[CONSTPOOL]]: +; CHECK-V6M-NEXT: .long 65535 + +; CHECK-ARM-LABEL: test_movw: +; CHECK-ARM: mov r0, #255 @ encoding: [0xff,0x00,0xa0,0xe3] +; CHECK-ARM: orr r0, r0, #65280 @ encoding: [0xff,0x0c,0x80,0xe3] + ret i32 65535 +} + +define i32 @test_regress1() { +; CHECK-ARM-LABEL: test_regress1: +; CHECK-ARM: mov r0, #248 @ encoding: [0xf8,0x00,0xa0,0xe3] +; CHECK-ARM: orr r0, r0, #16252928 @ encoding: [0x3e,0x07,0x80,0xe3] + ret i32 16253176 +} diff --git a/test/CodeGen/ARM/minsize-litpools.ll b/test/CodeGen/ARM/minsize-litpools.ll new file mode 100644 index 0000000..d5cd2a9 --- /dev/null +++ b/test/CodeGen/ARM/minsize-litpools.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=thumbv7s %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7s %s -o - | FileCheck %s + +; CodeGen should be able to set and reset the MinSize subtarget-feature, and +; make use of it in deciding whether to use MOVW/MOVT for global variables or a +; lit-pool load (saving roughly 2 bytes of code). + +@var = global i32 0 + +define i32 @small_global() minsize { +; CHECK-LABEL: small_global: +; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}} +; CHECK: ldr r0, [r[[GLOBDEST]]] + + %val = load i32* @var + ret i32 %val +} + +define i32 @big_global() { +; CHECK-LABEL: big_global: +; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var +; CHECK: movt [[GLOBDEST]], :upper16:var + + %val = load i32* @var + ret i32 %val +} diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll index 8f0d3a8..6776e63 100644 --- a/test/CodeGen/ARM/mls.ll +++ b/test/CodeGen/ARM/mls.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+v6t2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS +; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 -arm-use-mulops=false %s -o - \ +; RUN: | FileCheck %s -check-prefix=NO_MULOPS define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll index bbedea1..1e10af1 100644 --- a/test/CodeGen/ARM/movt-movw-global.ll +++ b/test/CodeGen/ARM/movt-movw-global.ll @@ -16,8 +16,8 @@ entry: ; IOS-PIC: movw r0, :lower16:(L_foo$non_lazy_ptr-(LPC0_0+8)) ; IOS-PIC-NEXT: movt r0, :upper16:(L_foo$non_lazy_ptr-(LPC0_0+8)) -; IOS-STATIC-NOT: movw r0, :lower16:_foo -; IOS-STATIC-NOT: movt r0, :upper16:_foo +; IOS-STATIC: movw r0, :lower16:_foo +; IOS-STATIC-NEXT: movt r0, :upper16:_foo ret i32* @foo } @@ -32,8 +32,8 @@ entry: ; IOS-PIC: movw r1, :lower16:(L_foo$non_lazy_ptr-(LPC1_0+8)) ; IOS-PIC-NEXT: movt r1, :upper16:(L_foo$non_lazy_ptr-(LPC1_0+8)) -; IOS-STATIC-NOT: movw r1, :lower16:_foo -; IOS-STATIC-NOT: movt r1, :upper16:_foo +; IOS-STATIC: movw r1, :lower16:_foo +; IOS-STATIC-NEXT: movt r1, :upper16:_foo store i32 %baz, i32* @foo, align 4 ret void } diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll index 25c1bfe..735d949 100644 --- a/test/CodeGen/ARM/movt.ll +++ b/test/CodeGen/ARM/movt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; rdar://7317664 define i32 @t(i32 %X) nounwind { diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll index 482d8f2..ada3d4e 100644 --- a/test/CodeGen/ARM/mul_const.ll +++ b/test/CodeGen/ARM/mul_const.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @t9(i32 %v) nounwind readnone { entry: diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll index 63705c5..c66a804 100644 --- a/test/CodeGen/ARM/mulhi.ll +++ b/test/CodeGen/ARM/mulhi.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s -check-prefix=V6 -; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=V4 -; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=M3 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s -check-prefix=V6 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=V4 +; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m3 %s -o - | FileCheck %s -check-prefix=M3 define i32 @smulhi(i32 %x, i32 %y) nounwind { ; V6-LABEL: smulhi: diff --git a/test/CodeGen/ARM/mult-alt-generic-arm.ll b/test/CodeGen/ARM/mult-alt-generic-arm.ll index a8104db..05e9b0f 100644 --- a/test/CodeGen/ARM/mult-alt-generic-arm.ll +++ b/test/CodeGen/ARM/mult-alt-generic-arm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm -no-integrated-as ; ModuleID = 'mult-alt-generic.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" target triple = "arm" diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll index 2c5ccd7..489f247 100644 --- a/test/CodeGen/ARM/mvn.ll +++ b/test/CodeGen/ARM/mvn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep mvn | count 9 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1() { entry: @@ -72,3 +72,16 @@ entry: %tmp102 = icmp eq i32 -2, %a ; [#uses=1] ret i1 %tmp102 } + +; CHECK-LABEL: f1 +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK: mvn +; CHECK-NOT: mvn + diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll index 5892737..42e7d82 100644 --- a/test/CodeGen/ARM/neon_arith1.ll +++ b/test/CodeGen/ARM/neon_arith1.ll @@ -1,7 +1,10 @@ -; RUN: llc < %s -march=arm -mattr=+neon | grep vadd +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind { entry: %0 = add <8 x i8> %a, %b ret <8 x i8> %0 } + +; CHECK: vadd + diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll index 046b5da..e1662c4 100644 --- a/test/CodeGen/ARM/neon_cmp.ll +++ b/test/CodeGen/ARM/neon_cmp.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s + ; bug 15283 ; radar://13191881 ; CHECK: vfcmp diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll index 4a82c36..4f1607e 100644 --- a/test/CodeGen/ARM/neon_div.ll +++ b/test/CodeGen/ARM/neon_div.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -pre-RA-sched=source -disable-post-ra %s -o - \ +; RUN: | FileCheck %s define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll index 149f4c7..8e37ce7 100644 --- a/test/CodeGen/ARM/neon_fpconv.ll +++ b/test/CodeGen/ARM/neon_fpconv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; PR12540: ARM backend lowering of FP_ROUND v2f64 to v2f32. define <2 x float> @vtrunc(<2 x double> %a) { diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll index b892d2d..9fd3fc5 100644 --- a/test/CodeGen/ARM/neon_ld1.ll +++ b/test/CodeGen/ARM/neon_ld1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s ; CHECK: t1 ; CHECK: vldr d diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll index 25a670b..571a16a 100644 --- a/test/CodeGen/ARM/neon_ld2.ll +++ b/test/CodeGen/ARM/neon_ld2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s --check-prefix=SWIFT +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mcpu=swift %s -o - | FileCheck %s --check-prefix=SWIFT ; CHECK: t1 ; CHECK: vld1.64 diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll index 2e45919..84e4b30 100644 --- a/test/CodeGen/ARM/neon_minmax.ll +++ b/test/CodeGen/ARM/neon_minmax.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s define float @fmin_ole(float %x) nounwind { ;CHECK-LABEL: fmin_ole: diff --git a/test/CodeGen/ARM/neon_shift.ll b/test/CodeGen/ARM/neon_shift.ll index 340f220..3c09358 100644 --- a/test/CodeGen/ARM/neon_shift.ll +++ b/test/CodeGen/ARM/neon_shift.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; define <4 x i16> @t1(<4 x i32> %a) nounwind { diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll index 76b6044..7a02512 100644 --- a/test/CodeGen/ARM/neon_vabs.ll +++ b/test/CodeGen/ARM/neon_vabs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <4 x i32> @test1(<4 x i32> %a) nounwind { ; CHECK-LABEL: test1: diff --git a/test/CodeGen/ARM/none-macho.ll b/test/CodeGen/ARM/none-macho.ll new file mode 100644 index 0000000..2795b8c --- /dev/null +++ b/test/CodeGen/ARM/none-macho.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=thumbv7m-none-macho %s -o - -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NON-FAST +; RUN: llc -mtriple=thumbv7m-none-macho -O0 %s -o - -relocation-model=pic -disable-fp-elim | FileCheck %s +; RUN: llc -mtriple=thumbv7m-none-macho -filetype=obj %s -o /dev/null + + ; Bare-metal should probably "declare" segments just like normal MachO +; CHECK: __picsymbolstub4 +; CHECK: __StaticInit +; CHECK: __text + +@var = external global i32 + +define i32 @test_litpool() minsize { +; CHECK-LABEL: test_litpool: + %val = load i32* @var + ret i32 %val + + ; Lit-pool entries need to produce a "$non_lazy_ptr" version of the symbol. +; CHECK: LCPI0_0: +; CHECK-NEXT: .long L_var$non_lazy_ptr-(LPC0_0+4) +} + +define i32 @test_movw_movt() { +; CHECK-LABEL: test_movw_movt: + %val = load i32* @var + ret i32 %val + + ; movw/movt should also address their symbols MachO-style +; CHECK: movw [[RTMP:r[0-9]+]], :lower16:(L_var$non_lazy_ptr-(LPC1_0+4)) +; CHECK: movt [[RTMP]], :upper16:(L_var$non_lazy_ptr-(LPC1_0+4)) +; CHECK: LPC1_0: +; CHECK: add [[RTMP]], pc +} + +declare void @llvm.trap() + +define void @test_trap() { +; CHECK-LABEL: test_trap: + + ; Bare-metal MachO gets compiled on top of normal MachO toolchain which + ; understands trap natively. + call void @llvm.trap() +; CHECK: trap + + ret void +} + +define i32 @test_frame_ptr() { +; CHECK-LABEL: test_frame_ptr: + call void @test_trap() + + ; Frame pointer is r7 as for Darwin +; CHECK: mov r7, sp + ret i32 42 +} + +%big_arr = type [8 x i32] +define void @test_two_areas(%big_arr* %addr) { +; CHECK-LABEL: test_two_areas: + %val = load %big_arr* %addr + call void @test_trap() + store %big_arr %val, %big_arr* %addr + + ; This goes with the choice of r7 as FP (largely). FP and LR have to be stored + ; consecutively on the stack for the frame record to be valid, which means we + ; need the 2 register-save areas employed by iOS. +; CHECK-NON-FAST: push {r4, r5, r6, r7, lr} +; CHECK-NON-FAST: push.w {r8, r9, r10, r11} +; ... +; CHECK-NON-FAST: pop.w {r8, r9, r10, r11} +; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc} + ret void +} + +define void @test_tail_call() { +; CHECK-LABEL: test_tail_call: + tail call void @test_trap() + + ; Tail calls should be available and use Thumb2 branch. +; CHECK: b.w _test_trap + ret void +} + +define float @test_softfloat_calls(float %in) { +; CHECK-LABEL: test_softfloat_calls: + %sum = fadd float %in, %in + + ; Soft-float calls should be GNU-style rather than RTABI and should not be the + ; *vfp variants used for ARMv6 iOS. +; CHECK: blx ___addsf3{{$}} + ret float %sum +} + + ; Even bare-metal PIC needs GOT-like behaviour, in principle. Depends a bit on + ; the use-case of course, but LLVM doesn't know what that is. +; CHECK: non_lazy_symbol_pointers +; CHECK: L_var$non_lazy_ptr: +; CHECK-NEXT: .indirect_symbol _var + + ; All MachO objects should have this to give the linker leeway in removing + ; dead code. +; CHECK: .subsections_via_symbols diff --git a/test/CodeGen/ARM/noreturn.ll b/test/CodeGen/ARM/noreturn.ll index 4c876ce..edc3333 100644 --- a/test/CodeGen/ARM/noreturn.ll +++ b/test/CodeGen/ARM/noreturn.ll @@ -43,6 +43,23 @@ entry: unreachable } +; Test case for uwtable +define i32 @test4() uwtable { +; CHECK-LABEL: @test4 +; CHECK: push +entry: + tail call void @overflow() #0 + unreachable +} + +define i32 @test5() uwtable { +; CHECK-LABEL: @test5 +; CHECK: push +entry: + tail call void @overflow_with_unwind() #1 + unreachable +} + ; Function Attrs: noreturn declare void @overflow_with_unwind() #1 diff --git a/test/CodeGen/ARM/optimize-dmbs-v7.ll b/test/CodeGen/ARM/optimize-dmbs-v7.ll new file mode 100644 index 0000000..64f5e20 --- /dev/null +++ b/test/CodeGen/ARM/optimize-dmbs-v7.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -mtriple=armv7 -mattr=+db | FileCheck %s + +@x1 = global i32 0, align 4 +@x2 = global i32 0, align 4 + +define void @test() { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.013 = phi i32 [ 1, %entry ], [ %inc6, %for.body ] + store atomic i32 %i.013, i32* @x1 seq_cst, align 4 + store atomic i32 %i.013, i32* @x1 seq_cst, align 4 + store atomic i32 %i.013, i32* @x2 seq_cst, align 4 + %inc6 = add nsw i32 %i.013, 1 + %exitcond = icmp eq i32 %inc6, 2 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void + +; The for.body contains 3 seq_cst stores. +; Hence it should have 3 dmb;str;dmb sequences with the middle dmbs collapsed +; CHECK: %for.body +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK: str + +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK: str + +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK: str + +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK-NOT: str +; CHECK: %for.end +} + +define void @test2() { + call void @llvm.arm.dmb(i32 11) + tail call void @test() + call void @llvm.arm.dmb(i32 11) + ret void +; the call should prevent the two dmbs from collapsing +; CHECK: test2: +; CHECK: dmb +; CHECK-NEXT: bl +; CHECK-NEXT: dmb +} + +define void @test3() { + call void @llvm.arm.dmb(i32 11) + call void @llvm.arm.dsb(i32 9) + call void @llvm.arm.dmb(i32 11) + ret void +; the call should prevent the two dmbs from collapsing +; CHECK: test3: +; CHECK: dmb +; CHECK-NEXT: dsb +; CHECK-NEXT: dmb + +} + + +declare void @llvm.arm.dmb(i32) +declare void @llvm.arm.dsb(i32) diff --git a/test/CodeGen/ARM/optselect-regclass.ll b/test/CodeGen/ARM/optselect-regclass.ll index 1aa4520..0acb2f2 100644 --- a/test/CodeGen/ARM/optselect-regclass.ll +++ b/test/CodeGen/ARM/optselect-regclass.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs +; RUN: llc -mtriple=arm-eabi -mcpu=swift -verify-machineinstrs %s -o /dev/null + %union.opcode.0.2.5.8.15.28 = type { i32 } @opcode = external global %union.opcode.0.2.5.8.15.28, align 4 diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll index fbc1155..89abe28 100644 --- a/test/CodeGen/ARM/pack.ll +++ b/test/CodeGen/ARM/pack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s ; CHECK: test1 ; CHECK: pkhbt r0, r0, r1, lsl #16 diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll index dc1a95b..94bced5 100644 --- a/test/CodeGen/ARM/phi.ll +++ b/test/CodeGen/ARM/phi.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=arm -mattr=+v4t < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s + ; define i32 @test1(i1 %a, i32* %b) { diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll index bdf793d..7ace640 100644 --- a/test/CodeGen/ARM/popcnt.ll +++ b/test/CodeGen/ARM/popcnt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; Implement ctpop with vcnt define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { diff --git a/test/CodeGen/ARM/prefetch-thumb.ll b/test/CodeGen/ARM/prefetch-thumb.ll deleted file mode 100644 index e6f6ae8..0000000 --- a/test/CodeGen/ARM/prefetch-thumb.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc < %s -march=thumb -mattr=+v7 | FileCheck %s -check-prefix=THUMB2 -; TODO: This test case will be merged back into prefetch.ll when ARM mode issue is solved. - -declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind - -define void @t6() { -entry: -;ARM: t6: -;ARM: pld [sp] -;ARM: pld [sp, #50] - -;THUMB2: t6: -;THUMB2: pld [sp] -;THUMB2: pld [sp, #50] - -%red = alloca [100 x i8], align 1 -%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0 -%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50 -call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1) -call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1) -ret void -} diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll index 5badb31..7350e0a 100644 --- a/test/CodeGen/ARM/prefetch.ll +++ b/test/CodeGen/ARM/prefetch.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld -; RUN: llc < %s -march=thumb -mattr=+v7 | FileCheck %s -check-prefix=THUMB2 -; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -march=arm -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP +; RUN: llc -mtriple=thumb-eabi -mattr=-thumb2 %s -o - | FileCheck %s -check-prefix CHECK-T1 +; RUN: llc -mtriple=thumb-eabi -mattr=+v7 %s -o - | FileCheck %s -check-prefix=THUMB2 +; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s -check-prefix=ARM +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9-mp %s -o - | FileCheck %s -check-prefix=ARM-MP ; rdar://8601536 +; CHECK-T1-NOT: pld + define void @t1(i8* %ptr) nounwind { entry: ; ARM-LABEL: t1: @@ -75,3 +77,21 @@ entry: tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 0 ) ret void } + +define void @t6() { +entry: +;ARM-LABEL: t6: +;ARM: pld [sp] +;ARM: pld [sp, #50] + +;THUMB2-LABEL: t6: +;THUMB2: pld [sp] +;THUMB2: pld [sp, #50] + +%red = alloca [100 x i8], align 1 +%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0 +%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50 +call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1) +call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1) +ret void +} diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 25484f4..b245674 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -34,9 +34,11 @@ entry: %12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1] %13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1] %14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1] - %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> ) ; <<4 x i16>> [#uses=1] - %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> ) ; <<4 x i16>> [#uses=1] - %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> ; <<8 x i16>> [#uses=1] + %15 = lshr <4 x i32> %13, + %trunc_15 = trunc <4 x i32> %15 to <4 x i16> + %16 = lshr <4 x i32> %14, + %trunc_16 = trunc <4 x i32> %16 to <4 x i16> + %17 = shufflevector <4 x i16> %trunc_15, <4 x i16> %trunc_16, <8 x i32> ; <<8 x i16>> [#uses=1] %18 = bitcast i16* %o_ptr to i8* ; [#uses=1] tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1) ret void diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll index 5c312eb..e51067b 100644 --- a/test/CodeGen/ARM/ret0.ll +++ b/test/CodeGen/ARM/ret0.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define i32 @test() { ret i32 0 diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll index 1ab947b..b7eef20 100644 --- a/test/CodeGen/ARM/ret_arg1.ll +++ b/test/CodeGen/ARM/ret_arg1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define i32 @test(i32 %a1) { ret i32 %a1 diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll index 84477d0..bcb379b 100644 --- a/test/CodeGen/ARM/ret_arg2.ll +++ b/test/CodeGen/ARM/ret_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define i32 @test(i32 %a1, i32 %a2) { ret i32 %a2 diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll index f7f9057..625162f 100644 --- a/test/CodeGen/ARM/ret_arg3.ll +++ b/test/CodeGen/ARM/ret_arg3.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null + define i32 @test(i32 %a1, i32 %a2, i32 %a3) { ret i32 %a3 } diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll index f7b3e4a..81b55fe 100644 --- a/test/CodeGen/ARM/ret_arg4.ll +++ b/test/CodeGen/ARM/ret_arg4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { ret i32 %a4 diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll index c4f9fb5..680e89f 100644 --- a/test/CodeGen/ARM/ret_arg5.ll +++ b/test/CodeGen/ARM/ret_arg5.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { ret i32 %a5 diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll index 2bafea6..0caee0b 100644 --- a/test/CodeGen/ARM/ret_f32_arg2.ll +++ b/test/CodeGen/ARM/ret_f32_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define float @test_f32(float %a1, float %a2) { ret float %a2 diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll index c6ce60e..d39dc7e 100644 --- a/test/CodeGen/ARM/ret_f32_arg5.ll +++ b/test/CodeGen/ARM/ret_f32_arg5.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) { ret float %a5 diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll index 386e85f..c4519ff 100644 --- a/test/CodeGen/ARM/ret_f64_arg2.ll +++ b/test/CodeGen/ARM/ret_f64_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define double @test_f64(double %a1, double %a2) { ret double %a2 diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll index bdb0a60..ef11250 100644 --- a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll +++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mcpu=arm8 -mattr=+vfp2 %s -o /dev/null define double @test_double_arg_reg_split(i32 %a1, double %a2) { ret double %a2 diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll index 4f841a3..1130920 100644 --- a/test/CodeGen/ARM/ret_f64_arg_split.ll +++ b/test/CodeGen/ARM/ret_f64_arg_split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) { ret double %a3 diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll index 2144317..f45923e 100644 --- a/test/CodeGen/ARM/ret_f64_arg_stack.ll +++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) { ret double %a4 diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll index 908c34f..a87f3f2 100644 --- a/test/CodeGen/ARM/ret_i128_arg2.ll +++ b/test/CodeGen/ARM/ret_i128_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) { ret i128 %a3 diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll index b1a1024..c51d2b8 100644 --- a/test/CodeGen/ARM/ret_i64_arg2.ll +++ b/test/CodeGen/ARM/ret_i64_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -march=arm -mattr=+vfp2 %s -o /dev/null define i64 @test_i64(i64 %a1, i64 %a2) { ret i64 %a2 diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll index ffc1d2f..602997e 100644 --- a/test/CodeGen/ARM/ret_i64_arg3.ll +++ b/test/CodeGen/ARM/ret_i64_arg3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -march=arm -mattr=+vfp2 %s -o /dev/null define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) { ret i64 %a3 diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll index 956bce5..0583b27 100644 --- a/test/CodeGen/ARM/ret_i64_arg_split.ll +++ b/test/CodeGen/ARM/ret_i64_arg_split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) { ret i64 %a3 diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll index 2b7ae05..93dc5c1 100644 --- a/test/CodeGen/ARM/ret_void.ll +++ b/test/CodeGen/ARM/ret_void.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null define void @test() { ret void diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll index d2cdeb0..925e9e7 100644 --- a/test/CodeGen/ARM/returned-ext.ll +++ b/test/CodeGen/ARM/returned-ext.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF -; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D +; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s -check-prefix=CHECKELF +; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D declare i16 @identity16(i16 returned %x) declare i32 @identity32(i32 returned %x) diff --git a/test/CodeGen/ARM/returned-trunc-tail-calls.ll b/test/CodeGen/ARM/returned-trunc-tail-calls.ll index 5946727..6051a83 100644 --- a/test/CodeGen/ARM/returned-trunc-tail-calls.ll +++ b/test/CodeGen/ARM/returned-trunc-tail-calls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7 -arm-tail-calls | FileCheck %s +; RUN: llc < %s -mtriple=armv7 | FileCheck %s declare i16 @ret16(i16 returned) declare i32 @ret32(i32 returned) diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index 6c380ae..f95f971 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s define i32 @test1(i32 %X) nounwind { ; CHECK: test1 diff --git a/test/CodeGen/ARM/saxpy10-a9.ll b/test/CodeGen/ARM/saxpy10-a9.ll new file mode 100644 index 0000000..f8f5e18 --- /dev/null +++ b/test/CodeGen/ARM/saxpy10-a9.ll @@ -0,0 +1,135 @@ +; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -misched-bench -scheditins=false | FileCheck %s +; +; Test MI-Sched suppory latency based stalls on in in-order pipeline +; using the new machine model. + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" + +; Don't be too strict with the top of the schedule, but most of it +; should be nicely pipelined. +; +; CHECK: saxpy10: +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vldr +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vmul +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vmul +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vmov +; CHECK-NEXT: bx +; +; This accumulates a sum rather than storing each result. +define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) { +entry: + %0 = load float* %data1, align 4 + %mul = fmul float %0, %a + %1 = load float* %data2, align 4 + %add = fadd float %mul, %1 + %add2 = fadd float %add, 0.000000e+00 + %arrayidx.1 = getelementptr inbounds float* %data1, i32 1 + %2 = load float* %arrayidx.1, align 4 + %mul.1 = fmul float %2, %a + %arrayidx1.1 = getelementptr inbounds float* %data2, i32 1 + %3 = load float* %arrayidx1.1, align 4 + %add.1 = fadd float %mul.1, %3 + %add2.1 = fadd float %add2, %add.1 + %arrayidx.2 = getelementptr inbounds float* %data1, i32 2 + %4 = load float* %arrayidx.2, align 4 + %mul.2 = fmul float %4, %a + %arrayidx1.2 = getelementptr inbounds float* %data2, i32 2 + %5 = load float* %arrayidx1.2, align 4 + %add.2 = fadd float %mul.2, %5 + %add2.2 = fadd float %add2.1, %add.2 + %arrayidx.3 = getelementptr inbounds float* %data1, i32 3 + %6 = load float* %arrayidx.3, align 4 + %mul.3 = fmul float %6, %a + %arrayidx1.3 = getelementptr inbounds float* %data2, i32 3 + %7 = load float* %arrayidx1.3, align 4 + %add.3 = fadd float %mul.3, %7 + %add2.3 = fadd float %add2.2, %add.3 + %arrayidx.4 = getelementptr inbounds float* %data1, i32 4 + %8 = load float* %arrayidx.4, align 4 + %mul.4 = fmul float %8, %a + %arrayidx1.4 = getelementptr inbounds float* %data2, i32 4 + %9 = load float* %arrayidx1.4, align 4 + %add.4 = fadd float %mul.4, %9 + %add2.4 = fadd float %add2.3, %add.4 + %arrayidx.5 = getelementptr inbounds float* %data1, i32 5 + %10 = load float* %arrayidx.5, align 4 + %mul.5 = fmul float %10, %a + %arrayidx1.5 = getelementptr inbounds float* %data2, i32 5 + %11 = load float* %arrayidx1.5, align 4 + %add.5 = fadd float %mul.5, %11 + %add2.5 = fadd float %add2.4, %add.5 + %arrayidx.6 = getelementptr inbounds float* %data1, i32 6 + %12 = load float* %arrayidx.6, align 4 + %mul.6 = fmul float %12, %a + %arrayidx1.6 = getelementptr inbounds float* %data2, i32 6 + %13 = load float* %arrayidx1.6, align 4 + %add.6 = fadd float %mul.6, %13 + %add2.6 = fadd float %add2.5, %add.6 + %arrayidx.7 = getelementptr inbounds float* %data1, i32 7 + %14 = load float* %arrayidx.7, align 4 + %mul.7 = fmul float %14, %a + %arrayidx1.7 = getelementptr inbounds float* %data2, i32 7 + %15 = load float* %arrayidx1.7, align 4 + %add.7 = fadd float %mul.7, %15 + %add2.7 = fadd float %add2.6, %add.7 + %arrayidx.8 = getelementptr inbounds float* %data1, i32 8 + %16 = load float* %arrayidx.8, align 4 + %mul.8 = fmul float %16, %a + %arrayidx1.8 = getelementptr inbounds float* %data2, i32 8 + %17 = load float* %arrayidx1.8, align 4 + %add.8 = fadd float %mul.8, %17 + %add2.8 = fadd float %add2.7, %add.8 + %arrayidx.9 = getelementptr inbounds float* %data1, i32 9 + %18 = load float* %arrayidx.9, align 4 + %mul.9 = fmul float %18, %a + %arrayidx1.9 = getelementptr inbounds float* %data2, i32 9 + %19 = load float* %arrayidx1.9, align 4 + %add.9 = fadd float %mul.9, %19 + %add2.9 = fadd float %add2.8, %add.9 + ret float %add2.9 +} diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll index 36fbd19..3c25edc 100644 --- a/test/CodeGen/ARM/sbfx.ll +++ b/test/CodeGen/ARM/sbfx.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s define i32 @f1(i32 %a) { entry: diff --git a/test/CodeGen/ARM/segmented-stacks-dynamic.ll b/test/CodeGen/ARM/segmented-stacks-dynamic.ll new file mode 100644 index 0000000..13b5bcf --- /dev/null +++ b/test/CodeGen/ARM/segmented-stacks-dynamic.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define i32 @test_basic(i32 %l) { + %mem = alloca i32, i32 %l + call void @dummy_use (i32* %mem, i32 %l) + %terminate = icmp eq i32 %l, 0 + br i1 %terminate, label %true, label %false + +true: + ret i32 0 + +false: + %newlen = sub i32 %l, 1 + %retvalue = call i32 @test_basic(i32 %newlen) + ret i32 %retvalue + +; ARM-linux: test_basic: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB0_2 + +; ARM-linux: mov r4, #24 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + + +; ARM-android: test_basic: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB0_2 + +; ARM-android: mov r4, #24 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll new file mode 100644 index 0000000..5eff633 --- /dev/null +++ b/test/CodeGen/ARM/segmented-stacks.ll @@ -0,0 +1,235 @@ +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux + +; We used to crash with filetype=obj +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj + + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define void @test_basic() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; ARM-linux: test_basic: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB0_2 + +; ARM-linux: mov r4, #48 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_basic: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB0_2 + +; ARM-android: mov r4, #48 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define i32 @test_nested(i32 * nest %closure, i32 %other) { + %addend = load i32 * %closure + %result = add i32 %other, %addend + ret i32 %result + +; ARM-linux: test_nested: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB1_2 + +; ARM-linux: mov r4, #0 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_nested: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB1_2 + +; ARM-android: mov r4, #0 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define void @test_large() { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; ARM-linux: test_large: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: sub r5, sp, #40192 +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB2_2 + +; ARM-linux: mov r4, #40192 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_large: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: sub r5, sp, #40192 +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB2_2 + +; ARM-android: mov r4, #40192 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define fastcc void @test_fastcc() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; ARM-linux: test_fastcc: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB3_2 + +; ARM-linux: mov r4, #48 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_fastcc: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB3_2 + +; ARM-android: mov r4, #48 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define fastcc void @test_fastcc_large() { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; ARM-linux: test_fastcc_large: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: sub r5, sp, #40192 +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB4_2 + +; ARM-linux: mov r4, #40192 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_fastcc_large: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: sub r5, sp, #40192 +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB4_2 + +; ARM-android: mov r4, #40192 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll index 6f4bfb8..e2dc554 100644 --- a/test/CodeGen/ARM/select-imm.ll +++ b/test/CodeGen/ARM/select-imm.ll @@ -1,6 +1,10 @@ -; RUN: llc < %s -march=arm | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s --check-prefix=ARMT2 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s --check-prefix=ARM + +; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ +; RUN: | FileCheck %s --check-prefix=ARMT2 + +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ +; RUN: | FileCheck %s --check-prefix=THUMB2 define i32 @t1(i32 %c) nounwind readnone { entry: diff --git a/test/CodeGen/ARM/select-undef.ll b/test/CodeGen/ARM/select-undef.ll index 23f7eb8..bae4d40 100644 --- a/test/CodeGen/ARM/select-undef.ll +++ b/test/CodeGen/ARM/select-undef.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs +; RUN: llc -mtriple=arm-eabi -mcpu=swift -verify-machineinstrs %s -o /dev/null + define i32 @func(i32 %arg0, i32 %arg1) { entry: %cmp = icmp slt i32 %arg0, 10 diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index ed006d6..e9394a7 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -1,6 +1,10 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP -; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON +; RUN: llc -mtriple=arm-apple-darwin %s -o - | FileCheck %s + +; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: | FileCheck %s --check-prefix=CHECK-VFP + +; RUN: llc -mtriple=thumbv7-apple-darwin -mattr=+neon,+thumb2 %s -o - \ +; RUN: | FileCheck %s --check-prefix=CHECK-NEON define i32 @f1(i32 %a.s) { ;CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/setcc-sentinals.ll b/test/CodeGen/ARM/setcc-sentinals.ll index 8878f9b..dc45e0e 100644 --- a/test/CodeGen/ARM/setcc-sentinals.ll +++ b/test/CodeGen/ARM/setcc-sentinals.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -mcpu=cortex-a8 -march=arm -asm-verbose=false | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -asm-verbose=false %s -o - | FileCheck %s define zeroext i1 @test0(i32 %x) nounwind { ; CHECK-LABEL: test0: -; CHECK-NEXT: add [[REG:(r[0-9]+)|(lr)]], r0, #1 +; CHECK: add [[REG:(r[0-9]+)|(lr)]], r0, #1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: cmp [[REG]], #1 ; CHECK-NEXT: movwhi r0, #1 diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll index 686d791..b7ddd10 100644 --- a/test/CodeGen/ARM/smul.ll +++ b/test/CodeGen/ARM/smul.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mcpu=generic -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s @x = weak global i16 0 ; [#uses=1] @y = weak global i16 0 ; [#uses=0] diff --git a/test/CodeGen/ARM/ssp-data-layout.ll b/test/CodeGen/ARM/ssp-data-layout.ll new file mode 100644 index 0000000..e7dafac --- /dev/null +++ b/test/CodeGen/ARM/ssp-data-layout.ll @@ -0,0 +1,528 @@ +; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -o - | FileCheck %s +; This test is fairly fragile. The goal is to ensure that "large" stack +; objects are allocated closest to the stack protector (i.e., farthest away +; from the Stack Pointer.) In standard SSP mode this means that large (>= +; ssp-buffer-size) arrays and structures containing such arrays are +; closet to the protector. With sspstrong and sspreq this means large +; arrays/structures-with-arrays are closest, followed by small (< ssp-buffer-size) +; arrays/structures-with-arrays, and then addr-taken variables. +; +; Ideally, we only want verify that the objects appear in the correct groups +; and that the groups have the correct relative stack offset. The ordering +; within a group is not relevant to this test. Unfortunately, there is not +; an elegant way to do this, so just match the offset for each object. + +%struct.struct_large_char = type { [8 x i8] } +%struct.struct_large_char2 = type { [2 x i8], [8 x i8] } +%struct.struct_small_char = type { [2 x i8] } +%struct.struct_large_nonchar = type { [8 x i32] } +%struct.struct_small_nonchar = type { [2 x i16] } + +define void @layout_ssp() ssp { +entry: +; Expected stack layout for ssp is +; 180 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size +; 172 struct_large_char . +; 168 scalar1 | Everything else +; 164 scalar2 +; 160 scalar3 +; 156 addr-of +; 152 small_nonchar (84+68) +; 112 large_nonchar +; 110 small_char +; 108 struct_small_char +; 72 struct_large_nonchar +; 68 struct_small_nonchar + +; CHECK: layout_ssp: +; r[[SP]] is used as an offset into the stack later +; CHECK: add r[[SP:[0-9]+]], sp, #68 + +; CHECK: bl get_scalar1 +; CHECK: str r0, [sp, #168] +; CHECK: bl end_scalar1 + +; CHECK: bl get_scalar2 +; CHECK: str r0, [sp, #164] +; CHECK: bl end_scalar2 + +; CHECK: bl get_scalar3 +; CHECK: str r0, [sp, #160] +; CHECK: bl end_scalar3 + +; CHECK: bl get_addrof +; CHECK: str r0, [sp, #156] +; CHECK: bl end_addrof + +; CHECK: get_small_nonchar +; CHECK: strh r0, [r[[SP]], #84] +; CHECK: bl end_small_nonchar + +; CHECK: bl get_large_nonchar +; CHECK: str r0, [sp, #112] +; CHECK: bl end_large_nonchar + +; CHECK: bl get_small_char +; CHECK: strb r0, [sp, #110] +; CHECK: bl end_small_char + +; CHECK: bl get_large_char +; CHECK: strb r0, [sp, #180] +; CHECK: bl end_large_char + +; CHECK: bl get_struct_large_char +; CHECK: strb r0, [sp, #172] +; CHECK: bl end_struct_large_char + +; CHECK: bl get_struct_small_char +; CHECK: strb r0, [sp, #108] +; CHECK: bl end_struct_small_char + +; CHECK: bl get_struct_large_nonchar +; CHECK:str r0, [sp, #72] +; CHECK: bl end_struct_large_nonchar + +; CHECK: bl get_struct_small_nonchar +; CHECK: strh r0, [r[[SP]]] +; CHECK: bl end_struct_small_nonchar + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %ptr = alloca i32, align 4 + %small2 = alloca [2 x i16], align 2 + %large2 = alloca [8 x i32], align 16 + %small = alloca [2 x i8], align 1 + %large = alloca [8 x i8], align 1 + %a = alloca %struct.struct_large_char, align 1 + %b = alloca %struct.struct_small_char, align 1 + %c = alloca %struct.struct_large_nonchar, align 8 + %d = alloca %struct.struct_small_nonchar, align 2 + %call = call i32 @get_scalar1() + store i32 %call, i32* %x, align 4 + call void @end_scalar1() + %call1 = call i32 @get_scalar2() + store i32 %call1, i32* %y, align 4 + call void @end_scalar2() + %call2 = call i32 @get_scalar3() + store i32 %call2, i32* %z, align 4 + call void @end_scalar3() + %call3 = call i32 @get_addrof() + store i32 %call3, i32* %ptr, align 4 + call void @end_addrof() + %call4 = call signext i16 @get_small_nonchar() + %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0 + store i16 %call4, i16* %arrayidx, align 2 + call void @end_small_nonchar() + %call5 = call i32 @get_large_nonchar() + %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0 + store i32 %call5, i32* %arrayidx6, align 4 + call void @end_large_nonchar() + %call7 = call signext i8 @get_small_char() + %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0 + store i8 %call7, i8* %arrayidx8, align 1 + call void @end_small_char() + %call9 = call signext i8 @get_large_char() + %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0 + store i8 %call9, i8* %arrayidx10, align 1 + call void @end_large_char() + %call11 = call signext i8 @get_struct_large_char() + %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0 + %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0 + store i8 %call11, i8* %arrayidx12, align 1 + call void @end_struct_large_char() + %call13 = call signext i8 @get_struct_small_char() + %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0 + %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0 + store i8 %call13, i8* %arrayidx15, align 1 + call void @end_struct_small_char() + %call16 = call i32 @get_struct_large_nonchar() + %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0 + %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0 + store i32 %call16, i32* %arrayidx18, align 4 + call void @end_struct_large_nonchar() + %call19 = call signext i16 @get_struct_small_nonchar() + %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0 + %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0 + store i16 %call19, i16* %arrayidx21, align 2 + call void @end_struct_small_nonchar() + %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0 + %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0 + %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0 + %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0 + %0 = load i32* %x, align 4 + %1 = load i32* %y, align 4 + %2 = load i32* %z, align 4 + %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0 + %3 = bitcast [8 x i8]* %coerce.dive to i64* + %4 = load i64* %3, align 1 + %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0 + %5 = bitcast [2 x i8]* %coerce.dive25 to i16* + %6 = load i16* %5, align 1 + %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0 + %7 = bitcast [2 x i16]* %coerce.dive26 to i32* + %8 = load i32* %7, align 1 + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + ret void +} + +define void @layout_sspstrong() sspstrong { +entry: +; Expected stack layout for sspstrong is +; 144 large_nonchar . Group 1, nested arrays, +; 136 large_char . arrays >= ssp-buffer-size +; 128 struct_large_char . +; 96 struct_large_nonchar . +; 84+8 small_non_char | Group 2, nested arrays, +; 90 small_char | arrays < ssp-buffer-size +; 88 struct_small_char | +; 84 struct_small_nonchar | +; 80 addrof * Group 3, addr-of local +; 76 scalar1 + Group 4, everything else +; 72 scalar2 + +; 68 scalar3 + +; +; CHECK: layout_sspstrong: +; r[[SP]] is used as an offset into the stack later +; CHECK: add r[[SP:[0-9]+]], sp, #84 + +; CHECK: bl get_scalar1 +; CHECK: str r0, [sp, #76] +; CHECK: bl end_scalar1 + +; CHECK: bl get_scalar2 +; CHECK: str r0, [sp, #72] +; CHECK: bl end_scalar2 + +; CHECK: bl get_scalar3 +; CHECK: str r0, [sp, #68] +; CHECK: bl end_scalar3 + +; CHECK: bl get_addrof +; CHECK: str r0, [sp, #80] +; CHECK: bl end_addrof + +; CHECK: get_small_nonchar +; CHECK: strh r0, [r[[SP]], #8] +; CHECK: bl end_small_nonchar + +; CHECK: bl get_large_nonchar +; CHECK: str r0, [sp, #144] +; CHECK: bl end_large_nonchar + +; CHECK: bl get_small_char +; CHECK: strb r0, [sp, #90] +; CHECK: bl end_small_char + +; CHECK: bl get_large_char +; CHECK: strb r0, [sp, #136] +; CHECK: bl end_large_char + +; CHECK: bl get_struct_large_char +; CHECK: strb r0, [sp, #128] +; CHECK: bl end_struct_large_char + +; CHECK: bl get_struct_small_char +; CHECK: strb r0, [sp, #88] +; CHECK: bl end_struct_small_char + +; CHECK: bl get_struct_large_nonchar +; CHECK: str r0, [sp, #96] +; CHECK: bl end_struct_large_nonchar + +; CHECK: bl get_struct_small_nonchar +; CHECK: strh r0, [r[[SP]]] +; CHECK: bl end_struct_small_nonchar + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %ptr = alloca i32, align 4 + %small2 = alloca [2 x i16], align 2 + %large2 = alloca [8 x i32], align 16 + %small = alloca [2 x i8], align 1 + %large = alloca [8 x i8], align 1 + %a = alloca %struct.struct_large_char, align 1 + %b = alloca %struct.struct_small_char, align 1 + %c = alloca %struct.struct_large_nonchar, align 8 + %d = alloca %struct.struct_small_nonchar, align 2 + %call = call i32 @get_scalar1() + store i32 %call, i32* %x, align 4 + call void @end_scalar1() + %call1 = call i32 @get_scalar2() + store i32 %call1, i32* %y, align 4 + call void @end_scalar2() + %call2 = call i32 @get_scalar3() + store i32 %call2, i32* %z, align 4 + call void @end_scalar3() + %call3 = call i32 @get_addrof() + store i32 %call3, i32* %ptr, align 4 + call void @end_addrof() + %call4 = call signext i16 @get_small_nonchar() + %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0 + store i16 %call4, i16* %arrayidx, align 2 + call void @end_small_nonchar() + %call5 = call i32 @get_large_nonchar() + %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0 + store i32 %call5, i32* %arrayidx6, align 4 + call void @end_large_nonchar() + %call7 = call signext i8 @get_small_char() + %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0 + store i8 %call7, i8* %arrayidx8, align 1 + call void @end_small_char() + %call9 = call signext i8 @get_large_char() + %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0 + store i8 %call9, i8* %arrayidx10, align 1 + call void @end_large_char() + %call11 = call signext i8 @get_struct_large_char() + %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0 + %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0 + store i8 %call11, i8* %arrayidx12, align 1 + call void @end_struct_large_char() + %call13 = call signext i8 @get_struct_small_char() + %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0 + %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0 + store i8 %call13, i8* %arrayidx15, align 1 + call void @end_struct_small_char() + %call16 = call i32 @get_struct_large_nonchar() + %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0 + %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0 + store i32 %call16, i32* %arrayidx18, align 4 + call void @end_struct_large_nonchar() + %call19 = call signext i16 @get_struct_small_nonchar() + %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0 + %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0 + store i16 %call19, i16* %arrayidx21, align 2 + call void @end_struct_small_nonchar() + %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0 + %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0 + %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0 + %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0 + %0 = load i32* %x, align 4 + %1 = load i32* %y, align 4 + %2 = load i32* %z, align 4 + %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0 + %3 = bitcast [8 x i8]* %coerce.dive to i64* + %4 = load i64* %3, align 1 + %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0 + %5 = bitcast [2 x i8]* %coerce.dive25 to i16* + %6 = load i16* %5, align 1 + %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0 + %7 = bitcast [2 x i16]* %coerce.dive26 to i32* + %8 = load i32* %7, align 1 + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + ret void +} + +define void @layout_sspreq() sspreq { +entry: +; Expected stack layout for sspreq is the same as sspstrong +; +; CHECK: layout_sspreq: +; r[[SP]] is used as an offset into the stack later +; CHECK: add r[[SP:[0-9]+]], sp, #84 + +; CHECK: bl get_scalar1 +; CHECK: str r0, [sp, #76] +; CHECK: bl end_scalar1 + +; CHECK: bl get_scalar2 +; CHECK: str r0, [sp, #72] +; CHECK: bl end_scalar2 + +; CHECK: bl get_scalar3 +; CHECK: str r0, [sp, #68] +; CHECK: bl end_scalar3 + +; CHECK: bl get_addrof +; CHECK: str r0, [sp, #80] +; CHECK: bl end_addrof + +; CHECK: get_small_nonchar +; CHECK: strh r0, [r[[SP]], #8] +; CHECK: bl end_small_nonchar + +; CHECK: bl get_large_nonchar +; CHECK: str r0, [sp, #144] +; CHECK: bl end_large_nonchar + +; CHECK: bl get_small_char +; CHECK: strb r0, [sp, #90] +; CHECK: bl end_small_char + +; CHECK: bl get_large_char +; CHECK: strb r0, [sp, #136] +; CHECK: bl end_large_char + +; CHECK: bl get_struct_large_char +; CHECK: strb r0, [sp, #128] +; CHECK: bl end_struct_large_char + +; CHECK: bl get_struct_small_char +; CHECK: strb r0, [sp, #88] +; CHECK: bl end_struct_small_char + +; CHECK: bl get_struct_large_nonchar +; CHECK: str r0, [sp, #96] +; CHECK: bl end_struct_large_nonchar + +; CHECK: bl get_struct_small_nonchar +; CHECK: strh r0, [r[[SP]]] +; CHECK: bl end_struct_small_nonchar + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %ptr = alloca i32, align 4 + %small2 = alloca [2 x i16], align 2 + %large2 = alloca [8 x i32], align 16 + %small = alloca [2 x i8], align 1 + %large = alloca [8 x i8], align 1 + %a = alloca %struct.struct_large_char, align 1 + %b = alloca %struct.struct_small_char, align 1 + %c = alloca %struct.struct_large_nonchar, align 8 + %d = alloca %struct.struct_small_nonchar, align 2 + %call = call i32 @get_scalar1() + store i32 %call, i32* %x, align 4 + call void @end_scalar1() + %call1 = call i32 @get_scalar2() + store i32 %call1, i32* %y, align 4 + call void @end_scalar2() + %call2 = call i32 @get_scalar3() + store i32 %call2, i32* %z, align 4 + call void @end_scalar3() + %call3 = call i32 @get_addrof() + store i32 %call3, i32* %ptr, align 4 + call void @end_addrof() + %call4 = call signext i16 @get_small_nonchar() + %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0 + store i16 %call4, i16* %arrayidx, align 2 + call void @end_small_nonchar() + %call5 = call i32 @get_large_nonchar() + %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0 + store i32 %call5, i32* %arrayidx6, align 4 + call void @end_large_nonchar() + %call7 = call signext i8 @get_small_char() + %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0 + store i8 %call7, i8* %arrayidx8, align 1 + call void @end_small_char() + %call9 = call signext i8 @get_large_char() + %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0 + store i8 %call9, i8* %arrayidx10, align 1 + call void @end_large_char() + %call11 = call signext i8 @get_struct_large_char() + %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0 + %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0 + store i8 %call11, i8* %arrayidx12, align 1 + call void @end_struct_large_char() + %call13 = call signext i8 @get_struct_small_char() + %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0 + %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0 + store i8 %call13, i8* %arrayidx15, align 1 + call void @end_struct_small_char() + %call16 = call i32 @get_struct_large_nonchar() + %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0 + %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0 + store i32 %call16, i32* %arrayidx18, align 4 + call void @end_struct_large_nonchar() + %call19 = call signext i16 @get_struct_small_nonchar() + %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0 + %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0 + store i16 %call19, i16* %arrayidx21, align 2 + call void @end_struct_small_nonchar() + %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0 + %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0 + %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0 + %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0 + %0 = load i32* %x, align 4 + %1 = load i32* %y, align 4 + %2 = load i32* %z, align 4 + %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0 + %3 = bitcast [8 x i8]* %coerce.dive to i64* + %4 = load i64* %3, align 1 + %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0 + %5 = bitcast [2 x i8]* %coerce.dive25 to i16* + %6 = load i16* %5, align 1 + %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0 + %7 = bitcast [2 x i16]* %coerce.dive26 to i32* + %8 = load i32* %7, align 1 + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + ret void +} + +define void @struct_with_protectable_arrays() sspstrong { +entry: +; Check to ensure that a structure which contains a small array followed by a +; large array is assigned to the stack properly as a large object. +; CHECK: struct_with_protectable_arrays: +; CHECK: bl get_struct_small_char +; CHECK: strb r0, [sp, #68] +; CHECK: bl end_struct_small_char +; CHECK: bl get_struct_large_char2 +; CHECK: strb r0, [sp, #106] +; CHECK: bl end_struct_large_char2 + %a = alloca %struct.struct_small_char, align 1 + %b = alloca %struct.struct_large_char2, align 1 + %d1 = alloca %struct.struct_large_nonchar, align 8 + %d2 = alloca %struct.struct_small_nonchar, align 2 + %call = call signext i8 @get_struct_small_char() + %foo = getelementptr inbounds %struct.struct_small_char* %a, i32 0, i32 0 + %arrayidx = getelementptr inbounds [2 x i8]* %foo, i32 0, i64 0 + store i8 %call, i8* %arrayidx, align 1 + call void @end_struct_small_char() + %call1 = call signext i8 @get_struct_large_char2() + %foo2 = getelementptr inbounds %struct.struct_large_char2* %b, i32 0, i32 1 + %arrayidx3 = getelementptr inbounds [8 x i8]* %foo2, i32 0, i64 0 + store i8 %call1, i8* %arrayidx3, align 1 + call void @end_struct_large_char2() + %0 = bitcast %struct.struct_large_char2* %b to %struct.struct_large_char* + %coerce.dive = getelementptr %struct.struct_large_char* %0, i32 0, i32 0 + %1 = bitcast [8 x i8]* %coerce.dive to i64* + %2 = load i64* %1, align 1 + %coerce.dive4 = getelementptr %struct.struct_small_char* %a, i32 0, i32 0 + %3 = bitcast [2 x i8]* %coerce.dive4 to i16* + %4 = load i16* %3, align 1 + %coerce.dive5 = getelementptr %struct.struct_small_nonchar* %d2, i32 0, i32 0 + %5 = bitcast [2 x i16]* %coerce.dive5 to i32* + %6 = load i32* %5, align 1 + call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0) + ret void +} + +declare i32 @get_scalar1() +declare void @end_scalar1() + +declare i32 @get_scalar2() +declare void @end_scalar2() + +declare i32 @get_scalar3() +declare void @end_scalar3() + +declare i32 @get_addrof() +declare void @end_addrof() + +declare signext i16 @get_small_nonchar() +declare void @end_small_nonchar() + +declare i32 @get_large_nonchar() +declare void @end_large_nonchar() + +declare signext i8 @get_small_char() +declare void @end_small_char() + +declare signext i8 @get_large_char() +declare void @end_large_char() + +declare signext i8 @get_struct_large_char() +declare void @end_struct_large_char() + +declare signext i8 @get_struct_large_char2() +declare void @end_struct_large_char2() + +declare signext i8 @get_struct_small_char() +declare void @end_struct_small_char() + +declare i32 @get_struct_large_nonchar() +declare void @end_struct_large_nonchar() + +declare signext i16 @get_struct_small_nonchar() +declare void @end_struct_small_nonchar() + +declare void @takes_all(i64, i16, %struct.struct_large_nonchar* byval align 8, i32, i8*, i8*, i32*, i16*, i32*, i32, i32, i32) diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll index 1dd57dd..a419074 100644 --- a/test/CodeGen/ARM/stack-frame.ll +++ b/test/CodeGen/ARM/stack-frame.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm -; RUN: llc < %s -march=arm | grep add | count 1 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define void @f1() { %c = alloca i8, align 1 @@ -10,4 +9,6 @@ define i32 @f2() { ret i32 1 } +; CHECK: add +; CHECK-NOT: add diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll index 32e3b85..a4f8640 100644 --- a/test/CodeGen/ARM/str_post.ll +++ b/test/CodeGen/ARM/str_post.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i16 @test1(i32* %X, i16* %A) { ; CHECK-LABEL: test1: diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll index d8b3f0e..60e6e9ec 100644 --- a/test/CodeGen/ARM/str_pre.ll +++ b/test/CodeGen/ARM/str_pre.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | \ -; RUN: grep "str.*\!" | count 2 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define void @test1(i32* %X, i32* %A, i32** %dest) { %B = load i32* %A ; [#uses=1] @@ -16,3 +15,8 @@ define i16* @test2(i16* %X, i32* %A) { store i16 %tmp, i16* %Y ret i16* %Y } + +; CHECK: str{{.*}}! +; CHECK: str{{.*}}! +; CHECK-NOT: str{{.*}}! + diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll index 2f1166b..6739684 100644 --- a/test/CodeGen/ARM/str_trunc.ll +++ b/test/CodeGen/ARM/str_trunc.ll @@ -1,7 +1,4 @@ -; RUN: llc < %s -march=arm | \ -; RUN: grep strb | count 1 -; RUN: llc < %s -march=arm | \ -; RUN: grep strh | count 1 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define void @test1(i32 %v, i16* %ptr) { %tmp = trunc i32 %v to i16 ; [#uses=1] @@ -14,3 +11,10 @@ define void @test2(i32 %v, i8* %ptr) { store i8 %tmp, i8* %ptr ret void } + +; CHECK: strh +; CHECK-NOT: strh + +; CHECK: strb +; CHECK-NOT: strb + diff --git a/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll index 1899269..0a9bc3c 100644 --- a/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll +++ b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll @@ -13,7 +13,7 @@ ;structs at varying alignments. Each test is run for arm, thumb2 and thumb1. ;We check for the strings in the generated object code using llvm-objdump ;because it provides better assurance that we are generating instructions -;for the correct architecture. Otherwise we could accidently generate an +;for the correct architecture. Otherwise we could accidentally generate an ;ARM instruction for THUMB1 and wouldn't detect it because the assembly ;code representation is the same, but the object code would be generated ;incorrectly. For each test we check for the label, a load instruction of the diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll index 7f82ca7..67bde2a 100644 --- a/test/CodeGen/ARM/sub.ll +++ b/test/CodeGen/ARM/sub.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; 171 = 0x000000ab define i64 @f1(i64 %a) { diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll index 1bc0315..d5abfc0 100644 --- a/test/CodeGen/ARM/subreg-remat.ll +++ b/test/CodeGen/ARM/subreg-remat.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source | FileCheck %s +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source -no-integrated-as | FileCheck %s target triple = "thumbv7-apple-ios" ; ; diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll index 656cd93..5ddea2e 100644 --- a/test/CodeGen/ARM/sxt_rot.ll +++ b/test/CodeGen/ARM/sxt_rot.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s define i32 @test0(i8 %A) { ; CHECK: test0 diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll index 8b41459..dd75cd1 100644 --- a/test/CodeGen/ARM/t2-imm.ll +++ b/test/CodeGen/ARM/t2-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f6(i32 %a) { ; CHECK:f6 diff --git a/test/CodeGen/ARM/tail-call.ll b/test/CodeGen/ARM/tail-call.ll new file mode 100644 index 0000000..7711586 --- /dev/null +++ b/test/CodeGen/ARM/tail-call.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple armv7 -O0 -o - < %s | FileCheck %s -check-prefix CHECK-TAIL +; RUN: llc -mtriple armv7 -O0 -disable-tail-calls -o - < %s \ +; RUN: | FileCheck %s -check-prefix CHECK-NO-TAIL + +declare i32 @callee(i32 %i) + +define i32 @caller(i32 %i) { +entry: + %r = tail call i32 @callee(i32 %i) + ret i32 %r +} + +; CHECK-TAIL-LABEL: caller +; CHECK-TAIL: b callee + +; CHECK-NO-TAIL-LABEL: caller +; CHECK-NO-TAIL: push {lr} +; CHECK-NO-TAIL: bl callee +; CHECK-NO-TAIL: pop {lr} +; CHECK-NO-TAIL: bx lr + diff --git a/test/CodeGen/ARM/taildup-branch-weight.ll b/test/CodeGen/ARM/taildup-branch-weight.ll new file mode 100644 index 0000000..0a16071 --- /dev/null +++ b/test/CodeGen/ARM/taildup-branch-weight.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=arm-eabi -print-machineinstrs=tailduplication -tail-dup-size=100 \ +; RUN: -enable-tail-merge=false -disable-cgp %s -o /dev/null 2>&1 \ +; RUN: | FileCheck %s + +; CHECK: Machine code for function test0: +; CHECK: Successors according to CFG: BB#1(4) BB#2(124) + +define void @test0(i32 %a, i32 %b, i32* %c, i32* %d) { +entry: + store i32 3, i32* %d + br label %B1 + +B2: + store i32 2, i32* %c + br label %B4 + +B3: + store i32 2, i32* %c + br label %B4 + +B1: + store i32 1, i32* %d + %test0 = icmp slt i32 %a, %b + br i1 %test0, label %B2, label %B3, !prof !0 + +B4: + ret void +} + +!0 = metadata !{metadata !"branch_weights", i32 4, i32 124} + +; CHECK: Machine code for function test1: +; CHECK: Successors according to CFG: BB#1(8) BB#2(248) + +@g0 = common global i32 0, align 4 + +define void @test1(i32 %a, i32 %b, i32* %c, i32* %d, i32* %e) { + + %test0 = icmp slt i32 %a, %b + br i1 %test0, label %B1, label %B2, !prof !1 + +B1: + br label %B3 + +B2: + store i32 2, i32* %c + br label %B3 + +B3: + store i32 3, i32* %e + ret void +} + +!1 = metadata !{metadata !"branch_weights", i32 248, i32 8} diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll index cb42de6..c681a1c 100644 --- a/test/CodeGen/ARM/this-return.ll +++ b/test/CodeGen/ARM/this-return.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF -; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D +; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s -check-prefix=CHECKELF +; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D %struct.A = type { i8 } %struct.B = type { i32 } diff --git a/test/CodeGen/ARM/thumb-litpool.ll b/test/CodeGen/ARM/thumb-litpool.ll new file mode 100644 index 0000000..f68fdb6 --- /dev/null +++ b/test/CodeGen/ARM/thumb-litpool.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=thumbv6m-apple-macho %s -relocation-model=static -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-apple-macho %s -relocation-model=pic -o - | FileCheck %s + +@var = global i8 zeroinitializer + +declare void @callee(i8*) + +define void @foo() minsize { +; CHECK-LABEL: foo: +; CHECK: ldr {{r[0-7]}}, LCPI0_0 + call void @callee(i8* @var) + call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7}"() + call void @callee(i8* @var) + ret void +} \ No newline at end of file diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll index 47c5dcc..d954760 100644 --- a/test/CodeGen/ARM/thumb2-it-block.ll +++ b/test/CodeGen/ARM/thumb2-it-block.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s ; PR11107 diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll index ccc9032..42c1ba9 100644 --- a/test/CodeGen/ARM/tls-models.ll +++ b/test/CodeGen/ARM/tls-models.ll @@ -22,9 +22,9 @@ entry: ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. ; CHECK-NONPIC-LABEL: f1: - ; CHECK-NONPIC: external_gd(gottpoff) + ; CHECK-NONPIC: external_gd(GOTTPOFF) ; CHECK-PIC-LABEL: f1: - ; CHECK-PIC: external_gd(tlsgd) + ; CHECK-PIC: external_gd(TLSGD) } define i32* @f2() { @@ -34,9 +34,9 @@ entry: ; Non-PIC code can use local exec, PIC code can use local dynamic, ; but that is not implemented, so falls back to general dynamic. ; CHECK-NONPIC-LABEL: f2: - ; CHECK-NONPIC: internal_gd(tpoff) + ; CHECK-NONPIC: internal_gd(TPOFF) ; CHECK-PIC-LABEL: f2: - ; CHECK-PIC: internal_gd(tlsgd) + ; CHECK-PIC: internal_gd(TLSGD) } @@ -49,9 +49,9 @@ entry: ; Non-PIC code can use initial exec, PIC should use local dynamic, ; but that is not implemented, so falls back to general dynamic. ; CHECK-NONPIC-LABEL: f3: - ; CHECK-NONPIC: external_ld(gottpoff) + ; CHECK-NONPIC: external_ld(GOTTPOFF) ; CHECK-PIC-LABEL: f3: - ; CHECK-PIC: external_ld(tlsgd) + ; CHECK-PIC: external_ld(TLSGD) } define i32* @f4() { @@ -61,9 +61,9 @@ entry: ; Non-PIC code can use local exec, PIC code can use local dynamic, ; but that is not implemented, so it falls back to general dynamic. ; CHECK-NONPIC-LABEL: f4: - ; CHECK-NONPIC: internal_ld(tpoff) + ; CHECK-NONPIC: internal_ld(TPOFF) ; CHECK-PIC-LABEL: f4: - ; CHECK-PIC: internal_ld(tlsgd) + ; CHECK-PIC: internal_ld(TLSGD) } @@ -75,9 +75,9 @@ entry: ; Non-PIC and PIC code will use initial exec as specified. ; CHECK-NONPIC-LABEL: f5: - ; CHECK-NONPIC: external_ie(gottpoff) + ; CHECK-NONPIC: external_ie(GOTTPOFF) ; CHECK-PIC-LABEL: f5: - ; CHECK-PIC: external_ie(gottpoff) + ; CHECK-PIC: external_ie(GOTTPOFF) } define i32* @f6() { @@ -86,9 +86,9 @@ entry: ; Non-PIC code can use local exec, PIC code use initial exec as specified. ; CHECK-NONPIC-LABEL: f6: - ; CHECK-NONPIC: internal_ie(tpoff) + ; CHECK-NONPIC: internal_ie(TPOFF) ; CHECK-PIC-LABEL: f6: - ; CHECK-PIC: internal_ie(gottpoff) + ; CHECK-PIC: internal_ie(GOTTPOFF) } @@ -100,9 +100,9 @@ entry: ; Non-PIC and PIC code will use local exec as specified. ; CHECK-NONPIC-LABEL: f7: - ; CHECK-NONPIC: external_le(tpoff) + ; CHECK-NONPIC: external_le(TPOFF) ; CHECK-PIC-LABEL: f7: - ; CHECK-PIC: external_le(tpoff) + ; CHECK-PIC: external_le(TPOFF) } define i32* @f8() { @@ -111,7 +111,7 @@ entry: ; Non-PIC and PIC code will use local exec as specified. ; CHECK-NONPIC-LABEL: f8: - ; CHECK-NONPIC: internal_le(tpoff) + ; CHECK-NONPIC: internal_le(TPOFF) ; CHECK-PIC-LABEL: f8: - ; CHECK-PIC: internal_le(tpoff) + ; CHECK-PIC: internal_le(TPOFF) } diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll index ec4278c..a1ca0b7 100644 --- a/test/CodeGen/ARM/tls1.ll +++ b/test/CodeGen/ARM/tls1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep "i(tpoff)" +; RUN: grep "i(TPOFF)" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep "__aeabi_read_tp" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \ diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll index f048125..24b4794 100644 --- a/test/CodeGen/ARM/tls2.ll +++ b/test/CodeGen/ARM/tls2.ll @@ -8,7 +8,7 @@ define i32 @f() { ; CHECK-NONPIC-LABEL: f: ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}] -; CHECK-NONPIC: i(gottpoff) +; CHECK-NONPIC: i(GOTTPOFF) ; CHECK-PIC-LABEL: f: ; CHECK-PIC: __tls_get_addr entry: @@ -19,7 +19,7 @@ entry: define i32* @g() { ; CHECK-NONPIC-LABEL: g: ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}] -; CHECK-NONPIC: i(gottpoff) +; CHECK-NONPIC: i(GOTTPOFF) ; CHECK-PIC-LABEL: g: ; CHECK-PIC: __tls_get_addr entry: diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll index 3033c2b..2ce9b89 100644 --- a/test/CodeGen/ARM/trunc_ldr.ll +++ b/test/CodeGen/ARM/trunc_ldr.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1 -; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s %struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** } %struct.B = type { float, float, i32, i32, i32, [0 x i8] } @@ -22,3 +21,10 @@ define i32 @f2(%struct.A* %d) { %tmp57 = sext i8 %tmp56 to i32 ret i32 %tmp57 } + +; CHECK: ldrb{{.*}}7 +; CHECK-NOT: ldrb{{.*}}7 + +; CHECK: ldrsb{{.*}}7 +; CHECK-NOT: ldrsb{{.*}}7 + diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll index 5665440..360e3e1 100644 --- a/test/CodeGen/ARM/truncstore-dag-combine.ll +++ b/test/CodeGen/ARM/truncstore-dag-combine.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v4t | not grep orr -; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s define void @bar(i8* %P, i16* %Q) { entry: @@ -16,3 +15,7 @@ entry: store i32 %tmp, i32* %P1, align 1 ret void } + +; CHECK-NOT: orr +; CHECK-NOT: mov + diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll index c83111e..bac4fd9 100644 --- a/test/CodeGen/ARM/tst_teq.ll +++ b/test/CodeGen/ARM/tst_teq.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | grep tst -; RUN: llc < %s -march=arm | grep teq +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f(i32 %a) { entry: @@ -16,3 +15,7 @@ entry: %retval = select i1 %0, i32 20, i32 10 ; [#uses=1] ret i32 %retval } + +; CHECK: tst +; CHECK: teq + diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll index 2172f6b..8da875f 100644 --- a/test/CodeGen/ARM/twoaddrinstr.ll +++ b/test/CodeGen/ARM/twoaddrinstr.ll @@ -1,5 +1,5 @@ ; Tests for the two-address instruction pass. -; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s define void @PR13378() nounwind { ; This was orriginally a crasher trying to schedule the instructions. diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index e7ff63f..72163ae 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED -; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED -; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED +; RUN: llc -mtriple=arm-eabi -pre-RA-sched=source %s -o - \ +; RUN: | FileCheck %s -check-prefix=EXPANDED + +; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source %s -o - \ +; RUN: | FileCheck %s -check-prefix=EXPANDED + +; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 %s -o - \ +; RUN: | FileCheck %s -check-prefix=UNALIGNED ; rdar://7113725 ; rdar://12091029 diff --git a/test/CodeGen/ARM/unaligned_load_store_vector.ll b/test/CodeGen/ARM/unaligned_load_store_vector.ll index 968a2c7..000ed48 100644 --- a/test/CodeGen/ARM/unaligned_load_store_vector.ll +++ b/test/CodeGen/ARM/unaligned_load_store_vector.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=arm -mattr=+v7 -mattr=+neon | FileCheck %s +;RUN: llc -mtriple=arm-eabi -mattr=+v7 -mattr=+neon %s -o - | FileCheck %s ;ALIGN = 1 ;SIZE = 64 diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll index bd28034..7243e99 100644 --- a/test/CodeGen/ARM/unord.ll +++ b/test/CodeGen/ARM/unord.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | grep movne | count 1 -; RUN: llc < %s -march=arm | grep moveq | count 1 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1(float %X, float %Y) { %tmp = fcmp uno float %X, %Y @@ -12,3 +11,10 @@ define i32 @f2(float %X, float %Y) { %retval = select i1 %tmp, i32 1, i32 -1 ret i32 %retval } + +; CHECK: movne +; CHECK-NOT: movne + +; CHECK: moveq +; CHECK-NOT: moveq + diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll index 628c079..235416a 100644 --- a/test/CodeGen/ARM/uxt_rot.ll +++ b/test/CodeGen/ARM/uxt_rot.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1 -; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1 -; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1 +; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s define zeroext i8 @test1(i32 %A.u) { %B.u = trunc i32 %A.u to i8 @@ -22,3 +20,13 @@ define zeroext i32 @test3(i32 %A.u) { %F.u = zext i16 %E.u to i32 ret i32 %F.u } + +; CHECK: uxtb +; CHECK-NOT: uxtb + +; CHECK: uxtab +; CHECK-NOT: uxtab + +; CHECK: uxth +; CHECK-NOT: uxth + diff --git a/test/CodeGen/ARM/v1-constant-fold.ll b/test/CodeGen/ARM/v1-constant-fold.ll index eb49a81..7421d25 100644 --- a/test/CodeGen/ARM/v1-constant-fold.ll +++ b/test/CodeGen/ARM/v1-constant-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon | FileCheck %s +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon | FileCheck %s ; PR15611. Check that we don't crash when constant folding v1i32 types. @@ -11,7 +11,7 @@ bb: %tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3 %tmp4 = add <4 x i32> %tmp3, ; CHECK: bl bar - tail call void @bar(<4 x i32> %tmp4) + call void @bar(<4 x i32> %tmp4) ret void } diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll index 97139e9..6478b18 100644 --- a/test/CodeGen/ARM/vaba.ll +++ b/test/CodeGen/ARM/vaba.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK-LABEL: vabas8: diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll index 2eb6d93..9ba8be2 100644 --- a/test/CodeGen/ARM/vabd.ll +++ b/test/CodeGen/ARM/vabd.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vabds8: diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll index 96dd38e..3a1aec8 100644 --- a/test/CodeGen/ARM/vabs.ll +++ b/test/CodeGen/ARM/vabs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vabss8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vabss8: @@ -28,7 +28,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind { ;CHECK-LABEL: vabsf32: ;CHECK: vabs.f32 %tmp1 = load <2 x float>* %A - %tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1) + %tmp2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %tmp1) ret <2 x float> %tmp2 } @@ -60,19 +60,19 @@ define <4 x float> @vabsQf32(<4 x float>* %A) nounwind { ;CHECK-LABEL: vabsQf32: ;CHECK: vabs.f32 %tmp1 = load <4 x float>* %A - %tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1) + %tmp2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp1) ret <4 x float> %tmp2 } declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone -declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) nounwind readnone declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone -declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vqabss8: diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index fcb5408..86b0d02 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vaddi8: diff --git a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll new file mode 100644 index 0000000..19d6cbe --- /dev/null +++ b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=arm-nacl-gnueabi | FileCheck %s + +declare void @llvm.va_start(i8*) +declare void @external_func(i8*) + +@va_list = external global i8* + +; On ARM, varargs arguments are passed in r0-r3 with the rest on the +; stack. A varargs function must therefore spill rN-r3 just below the +; function's initial stack pointer. +; +; This test checks for a bug in which a gap was left between the spill +; area and varargs arguments on the stack when using 16 byte stack +; alignment. + +define void @varargs_func(i32 %arg1, ...) { + call void @llvm.va_start(i8* bitcast (i8** @va_list to i8*)) + call void @external_func(i8* bitcast (i8** @va_list to i8*)) + ret void +} +; CHECK-LABEL: varargs_func: +; Reserve space for the varargs save area. This currently reserves +; more than enough (16 bytes rather than the 12 bytes needed). +; CHECK: sub sp, sp, #16 +; CHECK: push {lr} +; Align the stack pointer to a multiple of 16. +; CHECK: sub sp, sp, #12 +; Calculate the address of the varargs save area and save varargs +; arguments into it. +; CHECK-NEXT: add r0, sp, #20 +; CHECK-NEXT: stm r0, {r1, r2, r3} diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll index 5f3536c..3b810f3 100644 --- a/test/CodeGen/ARM/vargs.ll +++ b/test/CodeGen/ARM/vargs.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm +; RUN: llc -mtriple=arm-eabi %s -o /dev/null + @str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00" ; <[43 x i8]*> [#uses=1] define i32 @main() { diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll index 7b48441..dfeaacf 100644 --- a/test/CodeGen/ARM/vbits.ll +++ b/test/CodeGen/ARM/vbits.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck %s define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: v_andi8: diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll index 1e53e51..ddc37cc 100644 --- a/test/CodeGen/ARM/vbsl.ll +++ b/test/CodeGen/ARM/vbsl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; rdar://12471808 diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll index 0a1f2eb..e3202e4 100644 --- a/test/CodeGen/ARM/vceq.ll +++ b/test/CodeGen/ARM/vceq.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vceqi8: diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll index 81a59db..3739f5e 100644 --- a/test/CodeGen/ARM/vcge.ll +++ b/test/CodeGen/ARM/vcge.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vcges8: @@ -145,7 +145,7 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: vacge.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -154,12 +154,12 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vacge.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } -declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vcgei8Z: diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index 056866f..2f736f6 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -regalloc=basic %s -o - | FileCheck %s define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vcgts8: @@ -146,7 +146,7 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: vacgt.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -155,7 +155,7 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vacgt.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } @@ -172,8 +172,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x i32> %tmp4 } -declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vcgti8Z: diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll index 0b53979..390559b 100644 --- a/test/CodeGen/ARM/vcnt.ll +++ b/test/CodeGen/ARM/vcnt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; NB: this tests vcnt, vclz, and vcls define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll index 527f93b..d611267 100644 --- a/test/CodeGen/ARM/vcombine.ll +++ b/test/CodeGen/ARM/vcombine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK: vcombine8 diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index 4f17dc5..af4e6a3 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fp16 %s -o - | FileCheck %s define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { ;CHECK-LABEL: vcvt_f32tos32: diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index b24be26..89f355c 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -verify-machineinstrs %s -o - \ +; RUN: | FileCheck %s define <8 x i8> @v_dup8(i8 %A) nounwind { ;CHECK-LABEL: v_dup8: @@ -331,3 +332,35 @@ define <8 x i8> @check_i8(<16 x i8> %v) nounwind { %2 = insertelement <8 x i8> %1, i8 %x, i32 1 ret <8 x i8> %2 } + +; Check that an SPR splat produces a vdup. + +define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) { +;CHECK-LABEL: check_spr_splat2: +;CHECK: vdup.32 d + %conv = sitofp i16 %q to float + %splat.splatinsert = insertelement <2 x float> undef, float %conv, i32 0 + %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> undef, <2 x i32> zeroinitializer + %sub = fsub <2 x float> %splat.splat, %p + ret <2 x float> %sub +} + +define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) { +;CHECK-LABEL: check_spr_splat4: +;CHECK: vdup.32 q + %conv = sitofp i16 %q to float + %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + %sub = fsub <4 x float> %splat.splat, %p + ret <4 x float> %sub +} + +define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) { +;CHECK-LABEL: check_spr_splat4_lane1: +;CHECK: vdup.32 q{{.*}}, d{{.*}}[1] + %conv = sitofp i16 %q to float + %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 1 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> + %sub = fsub <4 x float> %splat.splat, %p + ret <4 x float> %sub +} diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index 5555a47..4407451 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: test_vextd: diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll index a23db7b..4b2fea9 100644 --- a/test/CodeGen/ARM/vfcmp.ll +++ b/test/CodeGen/ARM/vfcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; This tests fcmp operations that do not map directly to NEON instructions. diff --git a/test/CodeGen/ARM/vfp-regs-dwarf.ll b/test/CodeGen/ARM/vfp-regs-dwarf.ll new file mode 100644 index 0000000..4976729 --- /dev/null +++ b/test/CodeGen/ARM/vfp-regs-dwarf.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s + +; Generated from: +; void stack_offsets() { +; asm("" ::: "d8", "d9", "d11", "d13"); +; } +; Compiled with: "clang -target armv7-linux-gnueabihf -O3" + +; The important point we're checking here is that the .cfi directives describe +; the layout of the VFP registers correctly. The fact that the numbers are +; monotonic in memory is also a nice property to have. + +define void @stack_offsets() { +; CHECK-LABEL: stack_offsets: +; CHECK: vpush {d13} +; CHECK: vpush {d11} +; CHECK: vpush {d8, d9} + +; CHECK: .cfi_offset {{269|d13}}, -8 +; CHECK: .cfi_offset {{267|d11}}, -16 +; CHECK: .cfi_offset {{265|d9}}, -24 +; CHECK: .cfi_offset {{264|d8}}, -32 + +; CHECK: vpop {d8, d9} +; CHECK: vpop {d11} +; CHECK: vpop {d13} + call void asm sideeffect "", "~{d8},~{d9},~{d11},~{d13}"() #1 + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/Users/tim/llvm/build/tmp.c] [DW_LANG_C99] +!1 = metadata !{metadata !"tmp.c", metadata !"/Users/tim/llvm/build"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @stack_offsets, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/Users/tim/llvm/build/tmp.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{null} +!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} + diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll index 9c2ed57..6183db3 100644 --- a/test/CodeGen/ARM/vhadd.ll +++ b/test/CodeGen/ARM/vhadd.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vhadds8: diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll index 4bc2e87..f1a0cb2 100644 --- a/test/CodeGen/ARM/vhsub.ll +++ b/test/CodeGen/ARM/vhsub.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vhsubs8: diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll index 0a8f103..bebb320 100644 --- a/test/CodeGen/ARM/vicmp.ll +++ b/test/CodeGen/ARM/vicmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s ; This tests icmp operations that do not map directly to NEON instructions. ; Not-equal (ne) operations are implemented by VCEQ/VMVN. Less-than (lt/ult) diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index 444d0d5..caeeada 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s + +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic %s -o - \ +; RUN: | FileCheck %s define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK-LABEL: vld1i8: diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll index fddafea..7ac5cc7 100644 --- a/test/CodeGen/ARM/vld2.ll +++ b/test/CodeGen/ARM/vld2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 400541f..171a03c 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o -| FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -regalloc=basic %s -o - | FileCheck %s %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -83,6 +83,19 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { ret <1 x i64> %tmp4 } +define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind { +;CHECK-LABEL: vld3i64_update: +;CHECK: vld1.64 {d16, d17, d18}, [r1:64]! + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) + %tmp5 = getelementptr i64* %A, i32 3 + store i64* %tmp5, i64** %ptr + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK-LABEL: vld3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index f7376b5..94ad143 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @@ -83,6 +83,19 @@ define <1 x i64> @vld4i64(i64* %A) nounwind { ret <1 x i64> %tmp4 } +define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind { +;CHECK-LABEL: vld4i64_update: +;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64) + %tmp5 = getelementptr i64* %A, i32 4 + store i64* %tmp5, i64** %ptr + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK-LABEL: vld4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll index 5509f3e..64aac56 100644 --- a/test/CodeGen/ARM/vlddup.ll +++ b/test/CodeGen/ARM/vlddup.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vld1dupi8(i8* %A) nounwind { ;CHECK-LABEL: vld1dupi8: diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 7a83a4c..c7d69ff 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s + +; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic %s -o - \ +; RUN: | FileCheck %s define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vld1lanei8: diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll index 81f4578..1167ebe 100644 --- a/test/CodeGen/ARM/vminmax.ll +++ b/test/CodeGen/ARM/vminmax.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vmins8: diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll index caf6556..6073fc5 100644 --- a/test/CodeGen/ARM/vmla.ll +++ b/test/CodeGen/ARM/vmla.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { ;CHECK-LABEL: vmlai8: diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll index 61f3424..f86739c 100644 --- a/test/CodeGen/ARM/vmls.ll +++ b/test/CodeGen/ARM/vmls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { ;CHECK-LABEL: vmlsi8: diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index 8b63138..7900af4 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @v_movi8() nounwind { ;CHECK-LABEL: v_movi8: diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index de329ac..0fa43d8 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vmuli8: diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll index 1be4f74..4d548dd 100644 --- a/test/CodeGen/ARM/vneg.ll +++ b/test/CodeGen/ARM/vneg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vnegs8: diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll index a616a8d..ffeac73 100644 --- a/test/CodeGen/ARM/vpadal.ll +++ b/test/CodeGen/ARM/vpadal.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vpadals8: diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll index f84721f..01cb1c7 100644 --- a/test/CodeGen/ARM/vpadd.ll +++ b/test/CodeGen/ARM/vpadd.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vpaddi8: @@ -152,6 +152,17 @@ define void @addCombineToVPADDL() nounwind ssp { ret void } +; Legalization produces a EXTRACT_VECTOR_ELT DAG node which performs an extend from +; i16 to i32. In this case the input for the formed VPADDL needs to be a vector of i16s. +define <2 x i16> @fromExtendingExtractVectorElt(<4 x i16> %in) { +;CHECK-LABEL: fromExtendingExtractVectorElt: +;CHECK: vpaddl.s16 + %tmp1 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> + %tmp2 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> + %x = add <2 x i16> %tmp2, %tmp1 + ret <2 x i16> %x +} + declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll index c68b319..0b893e5 100644 --- a/test/CodeGen/ARM/vpminmax.ll +++ b/test/CodeGen/ARM/vpminmax.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vpmins8: diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll index 7840766..81acc8b 100644 --- a/test/CodeGen/ARM/vqadd.ll +++ b/test/CodeGen/ARM/vqadd.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vqadds8: diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll index b5cd716..4afef6d 100644 --- a/test/CodeGen/ARM/vqshl.ll +++ b/test/CodeGen/ARM/vqshl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vqshls8: diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll index 4abae70..f02482c 100644 --- a/test/CodeGen/ARM/vqshrn.ll +++ b/test/CodeGen/ARM/vqshrn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vqshrns8: diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll index 90bc349..4af4380 100644 --- a/test/CodeGen/ARM/vqsub.ll +++ b/test/CodeGen/ARM/vqsub.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vqsubs8: diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll index c0deca9..91979e5 100644 --- a/test/CodeGen/ARM/vrec.ll +++ b/test/CodeGen/ARM/vrec.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vrecpei32: diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index b6da694..eb76ba6 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: test_vrev64D8: diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll index 7e1f714..746b1b0 100644 --- a/test/CodeGen/ARM/vsel.ll +++ b/test/CodeGen/ARM/vsel.ll @@ -61,7 +61,7 @@ define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) { %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat ; CHECK: cmp r0, r1 -; CHECK: vselgt.f32 s0, s1, s0 +; CHECK: vselge.f32 s0, s1, s0 ret void } define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) { @@ -70,7 +70,7 @@ define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) { %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble ; CHECK: cmp r0, r1 -; CHECK: vselgt.f64 d16, d1, d0 +; CHECK: vselge.f64 d16, d1, d0 ret void } define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) { @@ -79,7 +79,7 @@ define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) { %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat ; CHECK: cmp r0, r1 -; CHECK: vselge.f32 s0, s1, s0 +; CHECK: vselgt.f32 s0, s1, s0 ret void } define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) { @@ -88,7 +88,7 @@ define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) { %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble ; CHECK: cmp r0, r1 -; CHECK: vselge.f64 d16, d1, d0 +; CHECK: vselgt.f64 d16, d1, d0 ret void } define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) { diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll index 9ea56a4..e999034 100644 --- a/test/CodeGen/ARM/vselect_imax.ll +++ b/test/CodeGen/ARM/vselect_imax.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; Make sure that ARM backend with NEON handles vselect. define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) { diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll index de380d3..618a137 100644 --- a/test/CodeGen/ARM/vshift.ll +++ b/test/CodeGen/ARM/vshift.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vshls8: @@ -180,7 +180,7 @@ define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vlshri8: ;CHECK: vshr.u8 %tmp1 = load <8 x i8>* %A - %tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp2 = lshr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <8 x i8> %tmp2 } @@ -188,7 +188,7 @@ define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind { ;CHECK-LABEL: vlshri16: ;CHECK: vshr.u16 %tmp1 = load <4 x i16>* %A - %tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > + %tmp2 = lshr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > ret <4 x i16> %tmp2 } @@ -196,7 +196,7 @@ define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vlshri32: ;CHECK: vshr.u32 %tmp1 = load <2 x i32>* %A - %tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 > + %tmp2 = lshr <2 x i32> %tmp1, < i32 31, i32 31 > ret <2 x i32> %tmp2 } @@ -204,7 +204,7 @@ define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind { ;CHECK-LABEL: vlshri64: ;CHECK: vshr.u64 %tmp1 = load <1 x i64>* %A - %tmp2 = lshr <1 x i64> %tmp1, < i64 64 > + %tmp2 = lshr <1 x i64> %tmp1, < i64 63 > ret <1 x i64> %tmp2 } @@ -252,7 +252,7 @@ define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind { ;CHECK-LABEL: vlshrQi8: ;CHECK: vshr.u8 %tmp1 = load <16 x i8>* %A - %tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp2 = lshr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <16 x i8> %tmp2 } @@ -260,7 +260,7 @@ define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vlshrQi16: ;CHECK: vshr.u16 %tmp1 = load <8 x i16>* %A - %tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > + %tmp2 = lshr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > ret <8 x i16> %tmp2 } @@ -268,7 +268,7 @@ define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind { ;CHECK-LABEL: vlshrQi32: ;CHECK: vshr.u32 %tmp1 = load <4 x i32>* %A - %tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > + %tmp2 = lshr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > ret <4 x i32> %tmp2 } @@ -276,7 +276,7 @@ define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind { ;CHECK-LABEL: vlshrQi64: ;CHECK: vshr.u64 %tmp1 = load <2 x i64>* %A - %tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 > + %tmp2 = lshr <2 x i64> %tmp1, < i64 63, i64 63 > ret <2 x i64> %tmp2 } @@ -331,7 +331,7 @@ define <8 x i8> @vashri8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vashri8: ;CHECK: vshr.s8 %tmp1 = load <8 x i8>* %A - %tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp2 = ashr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <8 x i8> %tmp2 } @@ -339,7 +339,7 @@ define <4 x i16> @vashri16(<4 x i16>* %A) nounwind { ;CHECK-LABEL: vashri16: ;CHECK: vshr.s16 %tmp1 = load <4 x i16>* %A - %tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > + %tmp2 = ashr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > ret <4 x i16> %tmp2 } @@ -347,7 +347,7 @@ define <2 x i32> @vashri32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vashri32: ;CHECK: vshr.s32 %tmp1 = load <2 x i32>* %A - %tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 > + %tmp2 = ashr <2 x i32> %tmp1, < i32 31, i32 31 > ret <2 x i32> %tmp2 } @@ -355,7 +355,7 @@ define <1 x i64> @vashri64(<1 x i64>* %A) nounwind { ;CHECK-LABEL: vashri64: ;CHECK: vshr.s64 %tmp1 = load <1 x i64>* %A - %tmp2 = ashr <1 x i64> %tmp1, < i64 64 > + %tmp2 = ashr <1 x i64> %tmp1, < i64 63 > ret <1 x i64> %tmp2 } @@ -403,7 +403,7 @@ define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind { ;CHECK-LABEL: vashrQi8: ;CHECK: vshr.s8 %tmp1 = load <16 x i8>* %A - %tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp2 = ashr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <16 x i8> %tmp2 } @@ -411,7 +411,7 @@ define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vashrQi16: ;CHECK: vshr.s16 %tmp1 = load <8 x i16>* %A - %tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > + %tmp2 = ashr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > ret <8 x i16> %tmp2 } @@ -419,7 +419,7 @@ define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind { ;CHECK-LABEL: vashrQi32: ;CHECK: vshr.s32 %tmp1 = load <4 x i32>* %A - %tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > + %tmp2 = ashr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > ret <4 x i32> %tmp2 } @@ -427,6 +427,6 @@ define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind { ;CHECK-LABEL: vashrQi64: ;CHECK: vshr.s64 %tmp1 = load <2 x i64>* %A - %tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 > + %tmp2 = ashr <2 x i64> %tmp1, < i64 63, i64 63 > ret <2 x i64> %tmp2 } diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll index 27610bf..9526c32 100644 --- a/test/CodeGen/ARM/vshiftins.ll +++ b/test/CodeGen/ARM/vshiftins.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vsli8: diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll index 462f7fe..6228652 100644 --- a/test/CodeGen/ARM/vshl.ll +++ b/test/CodeGen/ARM/vshl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vshls8: diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll index ae80664..27873eb 100644 --- a/test/CodeGen/ARM/vshll.ll +++ b/test/CodeGen/ARM/vshll.ll @@ -1,51 +1,57 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vshlls8: ;CHECK: vshll.s8 - %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) - ret <8 x i16> %tmp2 + %tmp1 = load <8 x i8>* %A + %sext = sext <8 x i8> %tmp1 to <8 x i16> + %shift = shl <8 x i16> %sext, + ret <8 x i16> %shift } define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind { ;CHECK-LABEL: vshlls16: ;CHECK: vshll.s16 - %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) - ret <4 x i32> %tmp2 + %tmp1 = load <4 x i16>* %A + %sext = sext <4 x i16> %tmp1 to <4 x i32> + %shift = shl <4 x i32> %sext, + ret <4 x i32> %shift } define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vshlls32: ;CHECK: vshll.s32 - %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) - ret <2 x i64> %tmp2 + %tmp1 = load <2 x i32>* %A + %sext = sext <2 x i32> %tmp1 to <2 x i64> + %shift = shl <2 x i64> %sext, + ret <2 x i64> %shift } define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vshllu8: ;CHECK: vshll.u8 - %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) - ret <8 x i16> %tmp2 + %tmp1 = load <8 x i8>* %A + %zext = zext <8 x i8> %tmp1 to <8 x i16> + %shift = shl <8 x i16> %zext, + ret <8 x i16> %shift } define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind { ;CHECK-LABEL: vshllu16: ;CHECK: vshll.u16 - %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) - ret <4 x i32> %tmp2 + %tmp1 = load <4 x i16>* %A + %zext = zext <4 x i16> %tmp1 to <4 x i32> + %shift = shl <4 x i32> %zext, + ret <4 x i32> %shift } define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vshllu32: ;CHECK: vshll.u32 - %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) - ret <2 x i64> %tmp2 + %tmp1 = load <2 x i32>* %A + %zext = zext <2 x i32> %tmp1 to <2 x i64> + %shift = shl <2 x i64> %zext, + ret <2 x i64> %shift } ; The following tests use the maximum shift count, so the signedness is @@ -53,31 +59,58 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vshlli8: ;CHECK: vshll.i8 - %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >) - ret <8 x i16> %tmp2 + %tmp1 = load <8 x i8>* %A + %sext = sext <8 x i8> %tmp1 to <8 x i16> + %shift = shl <8 x i16> %sext, + ret <8 x i16> %shift } define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind { ;CHECK-LABEL: vshlli16: ;CHECK: vshll.i16 - %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >) - ret <4 x i32> %tmp2 + %tmp1 = load <4 x i16>* %A + %zext = zext <4 x i16> %tmp1 to <4 x i32> + %shift = shl <4 x i32> %zext, + ret <4 x i32> %shift } define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vshlli32: ;CHECK: vshll.i32 - %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >) - ret <2 x i64> %tmp2 + %tmp1 = load <2 x i32>* %A + %zext = zext <2 x i32> %tmp1 to <2 x i64> + %shift = shl <2 x i64> %zext, + ret <2 x i64> %shift } -declare <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone +; And these have a shift just out of range so separate vmovl and vshl +; instructions are needed. +define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind { +; CHECK-LABEL: vshllu8_bad: +; CHECK: vmovl.u8 +; CHECK: vshl.i16 + %tmp1 = load <8 x i8>* %A + %zext = zext <8 x i8> %tmp1 to <8 x i16> + %shift = shl <8 x i16> %zext, + ret <8 x i16> %shift +} + +define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind { +; CHECK-LABEL: vshlls16_bad: +; CHECK: vmovl.s16 +; CHECK: vshl.i32 + %tmp1 = load <4 x i16>* %A + %sext = sext <4 x i16> %tmp1 to <4 x i32> + %shift = shl <4 x i32> %sext, + ret <4 x i32> %shift +} -declare <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone +define <2 x i64> @vshllu32_bad(<2 x i32>* %A) nounwind { +; CHECK-LABEL: vshllu32_bad: +; CHECK: vmovl.u32 +; CHECK: vshl.i64 + %tmp1 = load <2 x i32>* %A + %zext = zext <2 x i32> %tmp1 to <2 x i64> + %shift = shl <2 x i64> %zext, + ret <2 x i64> %shift +} diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll index 40a94fe..8aa009a 100644 --- a/test/CodeGen/ARM/vshrn.ll +++ b/test/CodeGen/ARM/vshrn.ll @@ -1,32 +1,61 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vshrns8: ;CHECK: vshrn.i16 %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) - ret <8 x i8> %tmp2 + %tmp2 = lshr <8 x i16> %tmp1, + %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> + ret <8 x i8> %tmp3 } define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind { ;CHECK-LABEL: vshrns16: ;CHECK: vshrn.i32 %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) - ret <4 x i16> %tmp2 + %tmp2 = ashr <4 x i32> %tmp1, + %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> + ret <4 x i16> %tmp3 } define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind { ;CHECK-LABEL: vshrns32: ;CHECK: vshrn.i64 %tmp1 = load <2 x i64>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) - ret <2 x i32> %tmp2 + %tmp2 = ashr <2 x i64> %tmp1, + %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vshrns8_bad(<8 x i16>* %A) nounwind { +; CHECK-LABEL: vshrns8_bad: +; CHECK: vshr.s16 +; CHECK: vmovn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = ashr <8 x i16> %tmp1, + %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vshrns16_bad(<4 x i32>* %A) nounwind { +; CHECK-LABEL: vshrns16_bad: +; CHECK: vshr.u32 +; CHECK: vmovn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = lshr <4 x i32> %tmp1, + %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> + ret <4 x i16> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind { +; CHECK-LABEL: vshrns32_bad: +; CHECK: vshr.u64 +; CHECK: vmovn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = lshr <2 x i64> %tmp1, + %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> + ret <2 x i32> %tmp3 +} define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind { ;CHECK-LABEL: vrshrns8: diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll index 7a211c3..fa5985a 100644 --- a/test/CodeGen/ARM/vsra.ll +++ b/test/CodeGen/ARM/vsra.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vsras8: ;CHECK: vsra.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > - %tmp4 = add <8 x i8> %tmp1, %tmp3 + %tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > + %tmp4 = add <8 x i8> %tmp1, %tmp3 ret <8 x i8> %tmp4 } @@ -15,7 +15,7 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsra.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 > + %tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 > %tmp4 = add <4 x i16> %tmp1, %tmp3 ret <4 x i16> %tmp4 } @@ -25,7 +25,7 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsra.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 > + %tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 > %tmp4 = add <2 x i32> %tmp1, %tmp3 ret <2 x i32> %tmp4 } @@ -35,7 +35,7 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vsra.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B - %tmp3 = ashr <1 x i64> %tmp2, < i64 64 > + %tmp3 = ashr <1 x i64> %tmp2, < i64 63 > %tmp4 = add <1 x i64> %tmp1, %tmp3 ret <1 x i64> %tmp4 } @@ -45,7 +45,7 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vsra.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > %tmp4 = add <16 x i8> %tmp1, %tmp3 ret <16 x i8> %tmp4 } @@ -55,7 +55,7 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vsra.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > + %tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > %tmp4 = add <8 x i16> %tmp1, %tmp3 ret <8 x i16> %tmp4 } @@ -65,7 +65,7 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vsra.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 > + %tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 > %tmp4 = add <4 x i32> %tmp1, %tmp3 ret <4 x i32> %tmp4 } @@ -75,7 +75,7 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vsra.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 > + %tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 > %tmp4 = add <2 x i64> %tmp1, %tmp3 ret <2 x i64> %tmp4 } @@ -85,7 +85,7 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsra.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > %tmp4 = add <8 x i8> %tmp1, %tmp3 ret <8 x i8> %tmp4 } @@ -95,7 +95,7 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsra.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 > + %tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 > %tmp4 = add <4 x i16> %tmp1, %tmp3 ret <4 x i16> %tmp4 } @@ -105,7 +105,7 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsra.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 > + %tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 > %tmp4 = add <2 x i32> %tmp1, %tmp3 ret <2 x i32> %tmp4 } @@ -115,7 +115,7 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vsra.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B - %tmp3 = lshr <1 x i64> %tmp2, < i64 64 > + %tmp3 = lshr <1 x i64> %tmp2, < i64 63 > %tmp4 = add <1 x i64> %tmp1, %tmp3 ret <1 x i64> %tmp4 } @@ -125,7 +125,7 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vsra.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > + %tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > %tmp4 = add <16 x i8> %tmp1, %tmp3 ret <16 x i8> %tmp4 } @@ -135,7 +135,7 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vsra.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > + %tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > %tmp4 = add <8 x i16> %tmp1, %tmp3 ret <8 x i16> %tmp4 } @@ -145,7 +145,7 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vsra.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 > + %tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 > %tmp4 = add <4 x i32> %tmp1, %tmp3 ret <4 x i32> %tmp4 } @@ -155,7 +155,7 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vsra.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 > + %tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 > %tmp4 = add <2 x i64> %tmp1, %tmp3 ret <2 x i64> %tmp4 } diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll index 36439fd..14f3ff0 100644 --- a/test/CodeGen/ARM/vst1.ll +++ b/test/CodeGen/ARM/vst1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vst1i8: diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 7551a56..2180259 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vst2i8: diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index 91eb7fc..5f150ed 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -fast-isel=0 -O0 %s -o - | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vst3i8: @@ -61,6 +61,18 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { ret void } +define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vst3i64_update +;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]! + %A = load i64** %ptr + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) + %tmp2 = getelementptr i64* %A, i32 3 + store i64* %tmp2, i64** %ptr + ret void +} + define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: vst3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index ef5c83a..44c76b5 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vst4i8: @@ -60,6 +60,18 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { ret void } +define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vst4i64_update: +;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]! + %A = load i64** %ptr + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) + %tmp2 = getelementptr i64* %A, i32 4 + store i64* %tmp2, i64** %ptr + ret void +} + define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: vst4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index 34c5c70..7dd6e7b 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vst1lanei8: diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 6b95b97..d1a094b 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vsubi8: diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll index 21614b0..32258a3 100644 --- a/test/CodeGen/ARM/vtbl.ll +++ b/test/CodeGen/ARM/vtbl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index 7d101bc..cdae7f8 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vtrni8: diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 2d193c1..832be6c 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vuzpi8: diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index f71aef7..f74dc62 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vzipi8: diff --git a/test/CodeGen/ARM/warn-stack.ll b/test/CodeGen/ARM/warn-stack.ll index 9538bbf..90a3e1f 100644 --- a/test/CodeGen/ARM/warn-stack.ll +++ b/test/CodeGen/ARM/warn-stack.ll @@ -12,7 +12,7 @@ entry: ret void } -; CHECK: warning: Stack size limit exceeded (96) in warn. +; CHECK: warning: stack size limit exceeded (96) in warn define void @warn() nounwind ssp { entry: %buffer = alloca [80 x i8], align 1 diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll index 5ac4b8c..375ce22 100644 --- a/test/CodeGen/ARM/weak.ll +++ b/test/CodeGen/ARM/weak.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm | grep .weak.*f -; RUN: llc < %s -march=arm | grep .weak.*h +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define weak i32 @f() { entry: @@ -14,3 +13,6 @@ entry: declare extern_weak void @h() +; CHECK: {{.}}weak{{.*}}f +; CHECK: {{.}}weak{{.*}}h + diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll index cf327bb..82ab90e 100644 --- a/test/CodeGen/ARM/weak2.ll +++ b/test/CodeGen/ARM/weak2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep .weak +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f(i32 %a) { entry: @@ -16,3 +16,6 @@ UnifiedReturnBlock: ; preds = %entry } declare extern_weak i32 @test_weak(...) + +; CHECK: {{.}}weak + diff --git a/test/CodeGen/ARM/zero-cycle-zero.ll b/test/CodeGen/ARM/zero-cycle-zero.ll new file mode 100644 index 0000000..121a87f --- /dev/null +++ b/test/CodeGen/ARM/zero-cycle-zero.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE +; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT + +declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>) + +define void @test_vec64() { +; CHECK-CYCLONE-LABEL: test_vec64: +; CHECK-SWIFT-LABEL: test_vec64: + + call arm_aapcs_vfpcc void @take_vec64(<2 x i32> ) + call arm_aapcs_vfpcc void @take_vec64(<2 x i32> ) +; CHECK-CYCLONE-NOT: vmov.f64 d0, +; CHECK-CYCLONE: vmov.i32 d0, #0 +; CHECK-CYCLONE: bl +; CHECK-CYCLONE: vmov.i32 d0, #0 +; CHECK-CYCLONE: bl + +; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]], +; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0 +; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]] +; CHECK-SWIFT: bl +; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]] +; CHECK-SWIFT: bl + + ret void +} + +declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>) + +define void @test_vec128() { +; CHECK-CYCLONE-LABEL: test_vec128: +; CHECK-SWIFT-LABEL: test_vec128: + + call arm_aapcs_vfpcc void @take_vec128(<8 x i16> ) + call arm_aapcs_vfpcc void @take_vec128(<8 x i16> ) +; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], +; CHECK-CYCLONE: vmov.i32 q0, #0 +; CHECK-CYCLONE: bl +; CHECK-CYCLONE: vmov.i32 q0, #0 +; CHECK-CYCLONE: bl + +; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], +; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0 +; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]] +; CHECK-SWIFT: bl +; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]] +; CHECK-SWIFT: bl + + ret void +} + +declare void @take_i32(i32) + +define void @test_i32() { +; CHECK-CYCLONE-LABEL: test_i32: +; CHECK-SWIFT-LABEL: test_i32: + + call arm_aapcs_vfpcc void @take_i32(i32 0) + call arm_aapcs_vfpcc void @take_i32(i32 0) +; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], +; CHECK-CYCLONE: mov r0, #0 +; CHECK-CYCLONE: bl +; CHECK-CYCLONE: mov r0, #0 +; CHECK-CYCLONE: bl + +; It doesn't particularly matter what Swift does here, there isn't carefully +; crafted behaviour that we might break in Cyclone. + + ret void +} diff --git a/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll b/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll new file mode 100644 index 0000000..6fb7c3f --- /dev/null +++ b/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin + +; Can't copy or spill / restore CPSR. +; rdar://9105206 + +define fastcc void @t() ssp align 2 { +entry: + br i1 undef, label %bb3.i, label %bb2.i + +bb2.i: ; preds = %entry + br label %bb3.i + +bb3.i: ; preds = %bb2.i, %entry + br i1 undef, label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71, label %bb.i69 + +bb.i69: ; preds = %bb3.i + br label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71 + +_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71: ; preds = %bb.i69, %bb3.i + %0 = select i1 undef, float 0.000000e+00, float undef + %1 = fdiv float %0, undef + %2 = fcmp ult float %1, 0xBF847AE140000000 + %storemerge9 = select i1 %2, float %1, float 0.000000e+00 + store float %storemerge9, float* undef, align 4 + br i1 undef, label %bb42, label %bb47 + +bb42: ; preds = %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71 + br i1 undef, label %bb46, label %bb53 + +bb46: ; preds = %bb42 + br label %bb48 + +bb47: ; preds = %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71 + br label %bb48 + +bb48: ; preds = %bb47, %bb46 + br i1 undef, label %bb1.i14, label %bb.i13 + +bb.i13: ; preds = %bb48 + br label %bb1.i14 + +bb1.i14: ; preds = %bb.i13, %bb48 + br label %bb53 + +bb53: ; preds = %bb1.i14, %bb42 + ret void +} diff --git a/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll new file mode 100644 index 0000000..2b083d8 --- /dev/null +++ b/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin + +; rdar://9146594 + +define void @drt_vsprintf() nounwind ssp { +entry: + %do_tab_convert = alloca i32, align 4 + br i1 undef, label %if.then24, label %if.else295, !dbg !13 + +if.then24: ; preds = %entry + unreachable + +if.else295: ; preds = %entry + call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16), !dbg !18 + store i32 0, i32* %do_tab_convert, align 4, !dbg !19 + unreachable +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +!llvm.dbg.gv = !{!0} +!llvm.dbg.sp = !{!1, !7, !10, !11, !12} + +!0 = metadata !{i32 589876, i32 0, metadata !1, metadata !"vsplive", metadata !"vsplive", metadata !"", metadata !2, i32 617, metadata !6, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] +!1 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"drt_vsprintf", metadata !"drt_vsprintf", metadata !"", i32 616, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{metadata !6} +!6 = metadata !{i32 589860, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"putc_mem", metadata !"putc_mem", metadata !"", i32 30, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!9 = metadata !{null} +!10 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_double", metadata !"print_double", metadata !"", i32 203, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_number", metadata !"print_number", metadata !"", i32 75, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!12 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"get_flags", metadata !"get_flags", metadata !"", i32 508, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!13 = metadata !{i32 653, i32 5, metadata !14, null} +!14 = metadata !{i32 589835, metadata !20, metadata !15, i32 652, i32 35, i32 2} ; [ DW_TAG_lexical_block ] +!15 = metadata !{i32 589835, metadata !20, metadata !1, i32 616, i32 1, i32 0} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 590080, metadata !17, metadata !"do_tab_convert", metadata !2, i32 853, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ] +!17 = metadata !{i32 589835, metadata !20, metadata !14, i32 850, i32 12, i32 33} ; [ DW_TAG_lexical_block ] +!18 = metadata !{i32 853, i32 11, metadata !17, null} +!19 = metadata !{i32 853, i32 29, metadata !17, null} +!20 = metadata !{metadata !"print.i", metadata !"/Volumes/Ebi/echeng/radars/r9146594"} +!21 = metadata !{i32 0} diff --git a/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll new file mode 100644 index 0000000..6f0ec34 --- /dev/null +++ b/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s +define void @foo(i64 %val) { +; CHECK: foo +; The stack frame store is not 64-bit aligned. Make sure we use an +; instruction that can handle that. +; CHECK: stur x0, [sp, #20] + %a = alloca [49 x i32], align 4 + %p32 = getelementptr inbounds [49 x i32]* %a, i64 0, i64 2 + %p = bitcast i32* %p32 to i64* + store i64 %val, i64* %p, align 8 + ret void +} diff --git a/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll b/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll new file mode 100644 index 0000000..88232fc --- /dev/null +++ b/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=arm64-apple-iOS5.0 + +; CPSR is not allocatable so fast allocatable wouldn't mark them killed. +; rdar://9313272 + +define hidden void @t() nounwind { +entry: + %cmp = icmp eq i32* null, undef + %frombool = zext i1 %cmp to i8 + store i8 %frombool, i8* undef, align 1 + %tmp4 = load i8* undef, align 1 + %tobool = trunc i8 %tmp4 to i1 + br i1 %tobool, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry + unreachable + +if.end: ; preds = %entry + br i1 undef, label %land.lhs.true14, label %if.end33 + +land.lhs.true14: ; preds = %if.end + unreachable + +if.end33: ; preds = %if.end + unreachable +} diff --git a/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll b/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll new file mode 100644 index 0000000..ea1cd02 --- /dev/null +++ b/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s + +; Can't fold the increment by 1<<12 into a post-increment load +; rdar://10301335 + +@test_data = common global i32 0, align 4 + +define void @t() nounwind ssp { +; CHECK-LABEL: t: +entry: + br label %for.body + +for.body: +; CHECK: for.body +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}] +; CHECK: add x[[REG:[0-9]+]], +; CHECK: x[[REG]], #4096 + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 12 + %add = add nsw i64 %0, 34628173824 + %1 = inttoptr i64 %add to i32* + %2 = load volatile i32* %1, align 4096 + store volatile i32 %2, i32* @test_data, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll new file mode 100644 index 0000000..d47dbb2 --- /dev/null +++ b/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=arm64 + +; The target lowering for integer comparisons was replacing some DAG nodes +; during operation legalization, which resulted in dangling pointers, +; cycles in DAGs, and eventually crashes. This is the testcase for +; one of those crashes. (rdar://10653656) + +define void @test(i1 zeroext %IsArrow) nounwind ssp align 2 { +entry: + br i1 undef, label %return, label %lor.lhs.false + +lor.lhs.false: + br i1 undef, label %return, label %if.end + +if.end: + %tmp.i = load i64* undef, align 8 + %and.i.i.i = and i64 %tmp.i, -16 + br i1 %IsArrow, label %if.else_crit_edge, label %if.end32 + +if.else_crit_edge: + br i1 undef, label %if.end32, label %return + +if.end32: + %0 = icmp ult i32 undef, 3 + %1 = zext i64 %tmp.i to i320 + %.pn.v = select i1 %0, i320 128, i320 64 + %.pn = shl i320 %1, %.pn.v + %ins346392 = or i320 %.pn, 0 + store i320 %ins346392, i320* undef, align 8 + br i1 undef, label %sw.bb.i.i, label %exit + +sw.bb.i.i: + unreachable + +exit: + unreachable + +return: + ret void +} diff --git a/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll b/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll new file mode 100644 index 0000000..a4d37e4 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s + +define i32 @foo(<4 x i32> %a, i32 %n) nounwind { +; CHECK-LABEL: foo: +; CHECK: fmov w0, s0 +; CHECK-NEXT: ret + %b = bitcast <4 x i32> %a to i128 + %c = trunc i128 %b to i32 + ret i32 %c +} + +define i64 @bar(<2 x i64> %a, i64 %n) nounwind { +; CHECK-LABEL: bar: +; CHECK: fmov x0, d0 +; CHECK-NEXT: ret + %b = bitcast <2 x i64> %a to i128 + %c = trunc i128 %b to i64 + ret i64 %c +} + diff --git a/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll new file mode 100644 index 0000000..d59b0d0 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march arm64 -mcpu=cyclone | FileCheck %s +; + +@b = private unnamed_addr constant [3 x i32] [i32 1768775988, i32 1685481784, i32 1836253201], align 4 + +; The important thing for this test is that we need an unaligned load of `l_b' +; ("ldr w2, [x1, #8]" in this case). + +; CHECK: adrp x[[PAGE:[0-9]+]], {{l_b@PAGE|.Lb}} +; CHECK: add x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b@PAGEOFF|:lo12:.Lb}} +; CHECK-NEXT: ldr [[VAL:w[0-9]+]], [x[[ADDR]], #8] +; CHECK-NEXT: str [[VAL]], [x0, #8] +; CHECK-NEXT: ldr [[VAL2:x[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: str [[VAL2]], [x0] + +define void @foo(i8* %a) { + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll b/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll new file mode 100644 index 0000000..d1840d3 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK-LINUX +; + +define hidden void @t() optsize ssp { +entry: + store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* undef, align 8 +; CHECK: adrp x{{[0-9]+}}, _x@GOTPAGE +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, _x@GOTPAGEOFF] +; CHECK-NEXT: and x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff +; CHECK-NEXT: str x{{[0-9]+}}, [x{{[0-9]+}}] + unreachable +} + +declare i64 @x(i32) optsize + +; Worth checking the Linux code is sensible too: only way to access +; the GOT is via a 64-bit load. Just loading wN is unacceptable +; (there's no ELF relocation to do that). + +; CHECK-LINUX: adrp {{x[0-9]+}}, :got:x +; CHECK-LINUX: ldr {{x[0-9]+}}, [{{x[0-9]+}}, :got_lo12:x] diff --git a/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll b/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll new file mode 100644 index 0000000..4b037db --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs | FileCheck %s + +; LdStOpt bug created illegal instruction: +; %D1, %D2 = LDPSi %X0, 1 +; rdar://11512047 + +%0 = type opaque +%struct.CGRect = type { %struct.CGPoint, %struct.CGSize } +%struct.CGPoint = type { double, double } +%struct.CGSize = type { double, double } + +@"OBJC_IVAR_$_UIScreen._bounds" = external hidden global i64, section "__DATA, __objc_ivar", align 8 + +define hidden %struct.CGRect @t(%0* nocapture %self, i8* nocapture %_cmd) nounwind readonly optsize ssp { +entry: +; CHECK-LABEL: t: +; CHECK: ldp d{{[0-9]+}}, d{{[0-9]+}} + %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4 + %0 = bitcast %0* %self to i8* + %add.ptr = getelementptr inbounds i8* %0, i64 %ivar + %add.ptr10.0 = bitcast i8* %add.ptr to double* + %tmp11 = load double* %add.ptr10.0, align 8 + %add.ptr.sum = add i64 %ivar, 8 + %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum + %1 = bitcast i8* %add.ptr10.1 to double* + %tmp12 = load double* %1, align 8 + %add.ptr.sum17 = add i64 %ivar, 16 + %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17 + %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double* + %tmp = load double* %add.ptr4.1.0, align 8 + %add.ptr4.1.sum = add i64 %ivar, 24 + %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum + %2 = bitcast i8* %add.ptr4.1.1 to double* + %tmp5 = load double* %2, align 8 + %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0 + %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1 + %insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0 + %insert7 = insertvalue %struct.CGSize undef, double %tmp, 0 + %insert9 = insertvalue %struct.CGSize %insert7, double %tmp5, 1 + %insert3 = insertvalue %struct.CGRect %insert, %struct.CGSize %insert9, 1 + ret %struct.CGRect %insert3 +} + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} +!4 = metadata !{} diff --git a/test/CodeGen/ARM64/2012-06-06-FPToUI.ll b/test/CodeGen/ARM64/2012-06-06-FPToUI.ll new file mode 100644 index 0000000..dda4ff5 --- /dev/null +++ b/test/CodeGen/ARM64/2012-06-06-FPToUI.ll @@ -0,0 +1,65 @@ +; RUN: llc -march=arm64 -O0 < %s | FileCheck %s +; RUN: llc -march=arm64 -O3 < %s | FileCheck %s + +@.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1 +@.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1 +@.str2 = private unnamed_addr constant [8 x i8] c"%f %lu\0A\00", align 1 +@.str3 = private unnamed_addr constant [7 x i8] c"%f %u\0A\00", align 1 + +define void @testDouble(double %d) ssp { +; CHECK: fcvtzu x{{.}}, d{{.}} +; CHECK: fcvtzu w{{.}}, d{{.}} +entry: + %d.addr = alloca double, align 8 + store double %d, double* %d.addr, align 8 + %0 = load double* %d.addr, align 8 + %1 = load double* %d.addr, align 8 + %conv = fptoui double %1 to i64 + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv) + %2 = load double* %d.addr, align 8 + %3 = load double* %d.addr, align 8 + %conv1 = fptoui double %3 to i32 + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1) + ret void +} + +declare i32 @printf(i8*, ...) + +define void @testFloat(float %f) ssp { +; CHECK: fcvtzu x{{.}}, s{{.}} +; CHECK: fcvtzu w{{.}}, s{{.}} +entry: + %f.addr = alloca float, align 4 + store float %f, float* %f.addr, align 4 + %0 = load float* %f.addr, align 4 + %conv = fpext float %0 to double + %1 = load float* %f.addr, align 4 + %conv1 = fptoui float %1 to i64 + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1) + %2 = load float* %f.addr, align 4 + %conv2 = fpext float %2 to double + %3 = load float* %f.addr, align 4 + %conv3 = fptoui float %3 to i32 + %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3) + ret void +} + +define i32 @main(i32 %argc, i8** %argv) ssp { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + call void @testDouble(double 1.159198e+01) + call void @testFloat(float 0x40272F1800000000) + ret i32 0 +} + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} diff --git a/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll b/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll new file mode 100644 index 0000000..55ecfb5 --- /dev/null +++ b/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios +; rdar://11849816 + +@shlib_path_substitutions = external hidden unnamed_addr global i8**, align 8 + +declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone + +declare noalias i8* @xmalloc(i64) optsize + +declare i64 @strlen(i8* nocapture) nounwind readonly optsize + +declare i8* @__strcpy_chk(i8*, i8*, i64) nounwind optsize + +declare i8* @__strcat_chk(i8*, i8*, i64) nounwind optsize + +declare noalias i8* @xstrdup(i8*) optsize + +define i8* @dyld_fix_path(i8* %path) nounwind optsize ssp { +entry: + br i1 undef, label %if.end56, label %for.cond + +for.cond: ; preds = %entry + br i1 undef, label %for.cond10, label %for.body + +for.body: ; preds = %for.cond + unreachable + +for.cond10: ; preds = %for.cond + br i1 undef, label %if.end56, label %for.body14 + +for.body14: ; preds = %for.cond10 + %call22 = tail call i64 @strlen(i8* undef) nounwind optsize + %sext = shl i64 %call22, 32 + %conv30 = ashr exact i64 %sext, 32 + %add29 = sub i64 0, %conv30 + %sub = add i64 %add29, 0 + %add31 = shl i64 %sub, 32 + %sext59 = add i64 %add31, 4294967296 + %conv33 = ashr exact i64 %sext59, 32 + %call34 = tail call noalias i8* @xmalloc(i64 %conv33) nounwind optsize + br i1 undef, label %cond.false45, label %cond.true43 + +cond.true43: ; preds = %for.body14 + unreachable + +cond.false45: ; preds = %for.body14 + %add.ptr = getelementptr inbounds i8* %path, i64 %conv30 + unreachable + +if.end56: ; preds = %for.cond10, %entry + ret i8* null +} + +declare i32 @strncmp(i8* nocapture, i8* nocapture, i64) nounwind readonly optsize + +declare i8* @strcpy(i8*, i8* nocapture) nounwind diff --git a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll b/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll new file mode 100644 index 0000000..b40a581 --- /dev/null +++ b/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +;FAST-LABEL: _Z9example25v: +;FAST: fcmgt.4s +;FAST: ret + +;CHECK-LABEL: _Z9example25v: +;CHECK: fcmgt.4s +;CHECK: ret + +define <4 x i32> @_Z9example25v( <4 x float> %N0, <4 x float> %N1) { + %A = fcmp olt <4 x float> %N0, %N1 + %B = zext <4 x i1> %A to <4 x i32> + ret <4 x i32> %B +} diff --git a/test/CodeGen/ARM64/2013-01-23-frem-crash.ll b/test/CodeGen/ARM64/2013-01-23-frem-crash.ll new file mode 100644 index 0000000..9451124 --- /dev/null +++ b/test/CodeGen/ARM64/2013-01-23-frem-crash.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=arm64 +; Make sure we are not crashing on this test. + +define void @autogen_SD13158() { +entry: + %B26 = frem float 0.000000e+00, undef + br i1 undef, label %CF, label %CF77 + +CF: ; preds = %CF, %CF76 + store float %B26, float* undef + br i1 undef, label %CF, label %CF77 + +CF77: ; preds = %CF + ret void +} diff --git a/test/CodeGen/ARM64/2013-01-23-sext-crash.ll b/test/CodeGen/ARM64/2013-01-23-sext-crash.ll new file mode 100644 index 0000000..404027b --- /dev/null +++ b/test/CodeGen/ARM64/2013-01-23-sext-crash.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=arm64 + +; Make sure we are not crashing on this test. + +define void @autogen_SD12881() { +BB: + %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer + br label %CF + +CF: ; preds = %CF83, %CF, %BB + br i1 undef, label %CF, label %CF83 + +CF83: ; preds = %CF + %FC70 = sitofp <4 x i32> %B17 to <4 x double> + br label %CF +} + + +define void @autogen_SD12881_2() { +BB: + %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer + br label %CF + +CF: ; preds = %CF83, %CF, %BB + br i1 undef, label %CF, label %CF83 + +CF83: ; preds = %CF + %FC70 = uitofp <4 x i32> %B17 to <4 x double> + br label %CF +} + +define void @_Z12my_example2bv() nounwind noinline ssp { +entry: + %0 = fptosi <2 x double> undef to <2 x i32> + store <2 x i32> %0, <2 x i32>* undef, align 8 + ret void +} diff --git a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll b/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll new file mode 100644 index 0000000..70e745f --- /dev/null +++ b/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple + +;CHECK-LABEL: Shuff: +;CHECK: tbl.8b +;CHECK: ret +define <8 x i8 > @Shuff(<8 x i8> %in, <8 x i8>* %out) nounwind ssp { + %value = shufflevector <8 x i8> %in, <8 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %value +} + + diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll new file mode 100644 index 0000000..6397ac5 --- /dev/null +++ b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s +; +define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: bar: +; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 +; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 +; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 + %add = add <2 x i64> %a, %b + %vgetq_lane = extractelement <2 x i64> %add, i32 0 + %vgetq_lane2 = extractelement <2 x i64> %b, i32 0 + %add3 = add i64 %vgetq_lane, %vgetq_lane2 + %sub = sub i64 %vgetq_lane, %vgetq_lane2 + %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0 + %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1 + ret <2 x i64> %vecinit8 +} + +define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: subdd_su64: +; CHECK: sub d0, d1, d0 +; CHECK-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %sub.i = sub nsw i64 %vecext1, %vecext + %retval = bitcast i64 %sub.i to double + ret double %retval +} + +define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vaddd_su64: +; CHECK: add d0, d1, d0 +; CHECK-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %add.i = add nsw i64 %vecext1, %vecext + %retval = bitcast i64 %add.i to double + ret double %retval +} diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/ARM64/aapcs.ll new file mode 100644 index 0000000..27d2aa7 --- /dev/null +++ b/test/CodeGen/ARM64/aapcs.ll @@ -0,0 +1,86 @@ +; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s + +@var = global i32 0, align 4 + +define i128 @test_i128_align(i32, i128 %arg, i32 %after) { + store i32 %after, i32* @var, align 4 +; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] + + ret i128 %arg +; CHECK: mov x0, x2 +; CHECK: mov x1, x3 +} + +@var64 = global i64 0, align 8 + + ; Check stack slots are 64-bit at all times. +define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, + i32 %int, i64 %long) { + ; Part of last store. Blasted scheduler. +; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32] + + %ext_bool = zext i1 %bool to i64 + store volatile i64 %ext_bool, i64* @var64, align 8 +; CHECK: ldr w[[EXT:[0-9]+]], [sp] +; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 +; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] + + %ext_char = zext i8 %char to i64 + store volatile i64 %ext_char, i64* @var64, align 8 +; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8] +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_short = zext i16 %short to i64 + store volatile i64 %ext_short, i64* @var64, align 8 +; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16] +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_int = zext i32 %int to i64 + store volatile i64 %ext_int, i64* @var64, align 8 +; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24] +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] + + store volatile i64 %long, i64* @var64, align 8 +; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64] + + ret void +} + +; Make sure the callee does extensions (in the absence of zext/sext +; keyword on args) while we're here. + +define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { + %ext_bool = zext i1 %bool to i64 + store volatile i64 %ext_bool, i64* @var64 +; CHECK: and [[EXT:x[0-9]+]], x0, #0x1 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_char = sext i8 %char to i64 + store volatile i64 %ext_char, i64* @var64 +; CHECK: sxtb [[EXT:x[0-9]+]], x1 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_short = zext i16 %short to i64 + store volatile i64 %ext_short, i64* @var64 +; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_int = zext i32 %int to i64 + store volatile i64 %ext_int, i64* @var64 +; CHECK: uxtw [[EXT:x[0-9]+]], x3 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + ret void +} + +declare void @variadic(i32 %a, ...) + + ; Under AAPCS variadic functions have the same calling convention as + ; others. The extra arguments should go in registers rather than on the stack. +define void @test_variadic() { + call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0) +; CHECK: fmov d0, #2.0 +; CHECK: orr x1, xzr, #0x1 +; CHECK: bl variadic + ret void +} diff --git a/test/CodeGen/ARM64/abi-varargs.ll b/test/CodeGen/ARM64/abi-varargs.ll new file mode 100644 index 0000000..92db392 --- /dev/null +++ b/test/CodeGen/ARM64/abi-varargs.ll @@ -0,0 +1,191 @@ +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +target triple = "arm64-apple-ios7.0.0" + +; rdar://13625505 +; Here we have 9 fixed integer arguments the 9th argument in on stack, the +; varargs start right after at 8-byte alignment. +define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp { +; CHECK-LABEL: fn9: +; 9th fixed argument +; CHECK: ldr {{w[0-9]+}}, [sp, #64] +; CHECK: add [[ARGS:x[0-9]+]], sp, #72 +; CHECK: add {{x[0-9]+}}, [[ARGS]], #8 +; First vararg +; CHECK: ldr {{w[0-9]+}}, [sp, #72] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 +; Second vararg +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 +; Third vararg +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %args = alloca i8*, align 8 + %a10 = alloca i32, align 4 + %a11 = alloca i32, align 4 + %a12 = alloca i32, align 4 + store i32 %a1, i32* %1, align 4 + store i32 %a2, i32* %2, align 4 + store i32 %a3, i32* %3, align 4 + store i32 %a4, i32* %4, align 4 + store i32 %a5, i32* %5, align 4 + store i32 %a6, i32* %6, align 4 + store i32 %a7, i32* %7, align 4 + store i32 %a8, i32* %8, align 4 + store i32 %a9, i32* %9, align 4 + %10 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %10) + %11 = va_arg i8** %args, i32 + store i32 %11, i32* %a10, align 4 + %12 = va_arg i8** %args, i32 + store i32 %12, i32* %a11, align 4 + %13 = va_arg i8** %args, i32 + store i32 %13, i32* %a12, align 4 + ret void +} + +declare void @llvm.va_start(i8*) nounwind + +define i32 @main() nounwind ssp { +; CHECK-LABEL: main: +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: str {{x[0-9]+}}, [sp, #8] +; CHECK: str {{w[0-9]+}}, [sp] + %a1 = alloca i32, align 4 + %a2 = alloca i32, align 4 + %a3 = alloca i32, align 4 + %a4 = alloca i32, align 4 + %a5 = alloca i32, align 4 + %a6 = alloca i32, align 4 + %a7 = alloca i32, align 4 + %a8 = alloca i32, align 4 + %a9 = alloca i32, align 4 + %a10 = alloca i32, align 4 + %a11 = alloca i32, align 4 + %a12 = alloca i32, align 4 + store i32 1, i32* %a1, align 4 + store i32 2, i32* %a2, align 4 + store i32 3, i32* %a3, align 4 + store i32 4, i32* %a4, align 4 + store i32 5, i32* %a5, align 4 + store i32 6, i32* %a6, align 4 + store i32 7, i32* %a7, align 4 + store i32 8, i32* %a8, align 4 + store i32 9, i32* %a9, align 4 + store i32 10, i32* %a10, align 4 + store i32 11, i32* %a11, align 4 + store i32 12, i32* %a12, align 4 + %1 = load i32* %a1, align 4 + %2 = load i32* %a2, align 4 + %3 = load i32* %a3, align 4 + %4 = load i32* %a4, align 4 + %5 = load i32* %a5, align 4 + %6 = load i32* %a6, align 4 + %7 = load i32* %a7, align 4 + %8 = load i32* %a8, align 4 + %9 = load i32* %a9, align 4 + %10 = load i32* %a10, align 4 + %11 = load i32* %a11, align 4 + %12 = load i32* %a12, align 4 + call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)* @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12) + ret i32 0 +} + +;rdar://13668483 +@.str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1 +define void @foo(i8* %fmt, ...) nounwind { +entry: +; CHECK-LABEL: foo: +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8 +; CHECK: ldr {{w[0-9]+}}, [sp, #48] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15 +; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 +; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] + %fmt.addr = alloca i8*, align 8 + %args = alloca i8*, align 8 + %vc = alloca i32, align 4 + %vv = alloca <4 x i32>, align 16 + store i8* %fmt, i8** %fmt.addr, align 8 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %0 = va_arg i8** %args, i32 + store i32 %0, i32* %vc, align 4 + %1 = va_arg i8** %args, <4 x i32> + store <4 x i32> %1, <4 x i32>* %vv, align 16 + ret void +} + +define void @bar(i32 %x, <4 x i32> %y) nounwind { +entry: +; CHECK-LABEL: bar: +; CHECK: str {{q[0-9]+}}, [sp, #16] +; CHECK: str {{x[0-9]+}}, [sp] + %x.addr = alloca i32, align 4 + %y.addr = alloca <4 x i32>, align 16 + store i32 %x, i32* %x.addr, align 4 + store <4 x i32> %y, <4 x i32>* %y.addr, align 16 + %0 = load i32* %x.addr, align 4 + %1 = load <4 x i32>* %y.addr, align 16 + call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1) + ret void +} + +; rdar://13668927 +; When passing 16-byte aligned small structs as vararg, make sure the caller +; side is 16-byte aligned on stack. +%struct.s41 = type { i32, i16, i32, i16 } +define void @foo2(i8* %fmt, ...) nounwind { +entry: +; CHECK-LABEL: foo2: +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8 +; CHECK: ldr {{w[0-9]+}}, [sp, #48] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15 +; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 +; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] + %fmt.addr = alloca i8*, align 8 + %args = alloca i8*, align 8 + %vc = alloca i32, align 4 + %vs = alloca %struct.s41, align 16 + store i8* %fmt, i8** %fmt.addr, align 8 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %0 = va_arg i8** %args, i32 + store i32 %0, i32* %vc, align 4 + %ap.cur = load i8** %args + %1 = getelementptr i8* %ap.cur, i32 15 + %2 = ptrtoint i8* %1 to i64 + %3 = and i64 %2, -16 + %ap.align = inttoptr i64 %3 to i8* + %ap.next = getelementptr i8* %ap.align, i32 16 + store i8* %ap.next, i8** %args + %4 = bitcast i8* %ap.align to %struct.s41* + %5 = bitcast %struct.s41* %vs to i8* + %6 = bitcast %struct.s41* %4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false) + ret void +} +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define void @bar2(i32 %x, i128 %s41.coerce) nounwind { +entry: +; CHECK-LABEL: bar2: +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: str {{x[0-9]+}}, [sp] + %x.addr = alloca i32, align 4 + %s41 = alloca %struct.s41, align 16 + store i32 %x, i32* %x.addr, align 4 + %0 = bitcast %struct.s41* %s41 to i128* + store i128 %s41.coerce, i128* %0, align 1 + %1 = load i32* %x.addr, align 4 + %2 = bitcast %struct.s41* %s41 to i128* + %3 = load i128* %2, align 1 + call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3) + ret void +} diff --git a/test/CodeGen/ARM64/abi.ll b/test/CodeGen/ARM64/abi.ll new file mode 100644 index 0000000..a7693b6 --- /dev/null +++ b/test/CodeGen/ARM64/abi.ll @@ -0,0 +1,236 @@ +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +target triple = "arm64-apple-darwin" + +; rdar://9932559 +define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline { +entry: +; CHECK-LABEL: i8i16callee: +; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. +; They are i8, i16, i8 and i8. +; CHECK: ldrsb {{w[0-9]+}}, [sp, #5] +; CHECK: ldrsh {{w[0-9]+}}, [sp, #2] +; CHECK: ldrsb {{w[0-9]+}}, [sp] +; CHECK: ldrsb {{w[0-9]+}}, [sp, #4] +; FAST-LABEL: i8i16callee: +; FAST: ldrb {{w[0-9]+}}, [sp, #5] +; FAST: ldrb {{w[0-9]+}}, [sp, #4] +; FAST: ldrh {{w[0-9]+}}, [sp, #2] +; FAST: ldrb {{w[0-9]+}}, [sp] + %conv = sext i8 %a4 to i64 + %conv3 = sext i16 %a5 to i64 + %conv8 = sext i8 %b1 to i64 + %conv9 = sext i16 %b2 to i64 + %conv11 = sext i8 %b3 to i64 + %conv13 = sext i8 %b4 to i64 + %add10 = add i64 %a2, %a1 + %add12 = add i64 %add10, %a3 + %add14 = add i64 %add12, %conv + %add = add i64 %add14, %conv3 + %add1 = add i64 %add, %a6 + %add2 = add i64 %add1, %a7 + %add4 = add i64 %add2, %a8 + %add5 = add i64 %add4, %conv8 + %add6 = add i64 %add5, %conv9 + %add7 = add i64 %add6, %conv11 + %add15 = add i64 %add7, %conv13 + %sext = shl i64 %add15, 32 + %conv17 = ashr exact i64 %sext, 32 + ret i64 %conv17 +} + +define i32 @i8i16caller() nounwind readnone { +entry: +; CHECK: i8i16caller +; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. +; They are i8, i16, i8 and i8. +; CHECK: strb {{w[0-9]+}}, [sp, #5] +; CHECK: strb {{w[0-9]+}}, [sp, #4] +; CHECK: strh {{w[0-9]+}}, [sp, #2] +; CHECK: strb {{w[0-9]+}}, [sp] +; CHECK: bl +; FAST: i8i16caller +; FAST: strb {{w[0-9]+}}, [sp] +; FAST: strh {{w[0-9]+}}, [sp, #2] +; FAST: strb {{w[0-9]+}}, [sp, #4] +; FAST: strb {{w[0-9]+}}, [sp, #5] +; FAST: bl + %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 signext 97, i16 signext 98, i8 signext 99, i8 signext 100) + %conv = trunc i64 %call to i32 + ret i32 %conv +} + +; rdar://12651543 +define double @circle_center([2 x float] %a) nounwind ssp { + %call = tail call double @ext([2 x float] %a) nounwind +; CHECK: circle_center +; CHECK: bl + ret double %call +} +declare double @ext([2 x float]) + +; rdar://12656141 +; 16-byte vector should be aligned at 16-byte when passing on stack. +; A double argument will be passed on stack, so vecotr should be at sp+16. +define double @fixed_4i(<4 x i32>* nocapture %in) nounwind { +entry: +; CHECK: fixed_4i +; CHECK: str [[REG_1:q[0-9]+]], [sp, #16] +; FAST: fixed_4i +; FAST: mov x[[ADDR:[0-9]+]], sp +; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16] + %0 = load <4 x i32>* %in, align 16 + %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3) + ret double %call +} +declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, double, <4 x i32>, i8 signext) + +; rdar://12695237 +; d8 at sp, i in register w0. +@g_d = common global double 0.000000e+00, align 8 +define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4, + double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp { +entry: +; CHECK: test1 +; CHECK: ldr [[REG_1:d[0-9]+]], [sp] +; CHECK: scvtf [[REG_2:s[0-9]+]], w0 +; CHECK: fadd s0, [[REG_2]], s0 + %conv = sitofp i32 %i to float + %add = fadd float %conv, %f1 + %conv1 = fpext float %add to double + %add2 = fadd double %conv1, %d7 + %add3 = fadd double %add2, %d8 + store double %add3, double* @g_d, align 8 + ret void +} + +; i9 at sp, d1 in register s0. +define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp { +entry: +; CHECK: test2 +; CHECK: scvtf [[REG_2:s[0-9]+]], w0 +; CHECK: fadd s0, [[REG_2]], s0 +; CHECK: ldr [[REG_1:s[0-9]+]], [sp] + %conv = sitofp i32 %i1 to float + %add = fadd float %conv, %d1 + %conv1 = fpext float %add to double + %conv2 = sitofp i32 %i8 to double + %add3 = fadd double %conv2, %conv1 + %conv4 = sitofp i32 %i9 to double + %add5 = fadd double %conv4, %add3 + store double %add5, double* @g_d, align 8 + ret void +} + +; rdar://12648441 +; Check alignment on stack for v64, f64, i64, f32, i32. +define double @test3(<2 x i32>* nocapture %in) nounwind { +entry: +; CHECK: test3 +; CHECK: str [[REG_1:d[0-9]+]], [sp, #8] +; FAST: test3 +; FAST: mov x[[ADDR:[0-9]+]], sp +; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8] + %0 = load <2 x i32>* %in, align 8 + %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0, + <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, + <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3) + ret double %call +} +declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, + <2 x i32>, <2 x i32>, <2 x i32>, float, <2 x i32>, i8 signext) + +define double @test4(double* nocapture %in) nounwind { +entry: +; CHECK: test4 +; CHECK: str [[REG_1:d[0-9]+]], [sp, #8] +; CHECK: str [[REG_2:w[0-9]+]], [sp] +; CHECK: orr w0, wzr, #0x3 + %0 = load double* %in, align 8 + %call = tail call double @args_f64(double 3.000000e+00, double %0, double %0, + double %0, double %0, double %0, double %0, double %0, + float 3.000000e+00, double %0, i8 signext 3) + ret double %call +} +declare double @args_f64(double, double, double, double, double, double, double, + double, float, double, i8 signext) + +define i64 @test5(i64* nocapture %in) nounwind { +entry: +; CHECK: test5 +; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16] +; CHECK: str [[REG_1:x[0-9]+]], [sp, #8] +; CHECK: str [[REG_2:w[0-9]+]], [sp] + %0 = load i64* %in, align 8 + %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0, + i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3) + ret i64 %call +} +declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, + i8 signext) + +define i32 @test6(float* nocapture %in) nounwind { +entry: +; CHECK: test6 +; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8] +; CHECK: str [[REG_1:s[0-9]+]], [sp, #4] +; CHECK: strh [[REG_3:w[0-9]+]], [sp] + %0 = load float* %in, align 4 + %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, + float 6.0, float 7.0, float 8.0, i16 signext 3, float %0, + i8 signext 3) + ret i32 %call +} +declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32, + float, float, float, float, float, float, float, float, + i16 signext, float, i8 signext) + +define i32 @test7(i32* nocapture %in) nounwind { +entry: +; CHECK: test7 +; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8] +; CHECK: str [[REG_1:w[0-9]+]], [sp, #4] +; CHECK: strh [[REG_3:w[0-9]+]], [sp] + %0 = load i32* %in, align 4 + %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0, + i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4) + ret i32 %call +} +declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32, + i8 signext) + +define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind { +entry: +; CHECK: test8 +; CHECK: strb {{w[0-9]+}}, [sp, #3] +; CHECK: strb wzr, [sp, #2] +; CHECK: strb {{w[0-9]+}}, [sp, #1] +; CHECK: strb wzr, [sp] +; CHECK: bl +; FAST: test8 +; FAST: strb {{w[0-9]+}}, [sp] +; FAST: strb {{w[0-9]+}}, [sp, #1] +; FAST: strb {{w[0-9]+}}, [sp, #2] +; FAST: strb {{w[0-9]+}}, [sp, #3] +; FAST: bl + tail call void @args_i1(i1 zeroext false, i1 zeroext true, i1 zeroext false, + i1 zeroext true, i1 zeroext false, i1 zeroext true, + i1 zeroext false, i1 zeroext true, i1 zeroext false, + i1 zeroext true, i1 zeroext false, i1 zeroext true) + ret i32 0 +} + +declare void @args_i1(i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext, + i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext, + i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext) + +define i32 @i1_stack_incoming(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, + i64 %g, i64 %h, i64 %i, i1 zeroext %j) { +; CHECK-LABEL: i1_stack_incoming: +; CHECK: ldrb w0, [sp, #8] +; CHECK: ret + %v = zext i1 %j to i32 + ret i32 %v +} diff --git a/test/CodeGen/ARM64/abi_align.ll b/test/CodeGen/ARM64/abi_align.ll new file mode 100644 index 0000000..61c661e --- /dev/null +++ b/test/CodeGen/ARM64/abi_align.ll @@ -0,0 +1,529 @@ +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +target triple = "arm64-apple-darwin" + +; rdar://12648441 +; Generated from arm64-arguments.c with -O2. +; Test passing structs with size < 8, < 16 and > 16 +; with alignment of 16 and without + +; Structs with size < 8 +%struct.s38 = type { i32, i16 } +; With alignment of 16, the size will be padded to multiple of 16 bytes. +%struct.s39 = type { i32, i16, [10 x i8] } +; Structs with size < 16 +%struct.s40 = type { i32, i16, i32, i16 } +%struct.s41 = type { i32, i16, i32, i16 } +; Structs with size > 16 +%struct.s42 = type { i32, i16, i32, i16, i32, i16 } +%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] } + +@g38 = common global %struct.s38 zeroinitializer, align 4 +@g38_2 = common global %struct.s38 zeroinitializer, align 4 +@g39 = common global %struct.s39 zeroinitializer, align 16 +@g39_2 = common global %struct.s39 zeroinitializer, align 16 +@g40 = common global %struct.s40 zeroinitializer, align 4 +@g40_2 = common global %struct.s40 zeroinitializer, align 4 +@g41 = common global %struct.s41 zeroinitializer, align 16 +@g41_2 = common global %struct.s41 zeroinitializer, align 16 +@g42 = common global %struct.s42 zeroinitializer, align 4 +@g42_2 = common global %struct.s42 zeroinitializer, align 4 +@g43 = common global %struct.s43 zeroinitializer, align 16 +@g43_2 = common global %struct.s43 zeroinitializer, align 16 + +; structs with size < 8 bytes, passed via i64 in x1 and x2 +define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 { +entry: +; CHECK: f38 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w2 + %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32 + %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32 + %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32 + %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32 + %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16 + %sext = trunc i64 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16 + %sext10 = trunc i64 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller38() #1 { +entry: +; CHECK: caller38 +; CHECK: ldr x1, +; CHECK: ldr x2, + %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 + %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 + %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5 + ret i32 %call +} + +declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0 + +; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16] +; i9 at [sp] +define i32 @caller38_stack() #1 { +entry: +; CHECK: caller38_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 + %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 + %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, i64 %0, i64 %1) #5 + ret i32 %call +} + +; structs with size < 8 bytes, alignment of 16 +; passed via i128 in x1 and x3 +define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { +entry: +; CHECK: f39 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w3 + %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 + %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 + %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 + %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 + %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 + %sext = trunc i128 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 + %sext10 = trunc i128 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller39() #1 { +entry: +; CHECK: caller39 +; CHECK: ldp x1, x2, +; CHECK: ldp x3, x4, + %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 + %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 + %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5 + ret i32 %call +} + +declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 + +; structs with size < 8 bytes, alignment 16 +; passed on stack at [sp+16] and [sp+32] +define i32 @caller39_stack() #1 { +entry: +; CHECK: caller39_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 + %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 + %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 + ret i32 %call +} + +; structs with size < 16 bytes +; passed via i128 in x1 and x3 +define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 { +entry: +; CHECK: f40 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w3 + %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0 + %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0 + %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32 + %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32 + %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32 + %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16 + %sext = trunc i64 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32 + %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16 + %sext10 = trunc i64 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller40() #1 { +entry: +; CHECK: caller40 +; CHECK: ldp x1, x2, +; CHECK: ldp x3, x4, + %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 + %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 + %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5 + ret i32 %call +} + +declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 + +; structs with size < 16 bytes +; passed on stack at [sp+8] and [sp+24] +define i32 @caller40_stack() #1 { +entry: +; CHECK: caller40_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 + %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 + %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5 + ret i32 %call +} + +; structs with size < 16 bytes, alignment of 16 +; passed via i128 in x1 and x3 +define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { +entry: +; CHECK: f41 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w3 + %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 + %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 + %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 + %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 + %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 + %sext = trunc i128 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 + %sext10 = trunc i128 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller41() #1 { +entry: +; CHECK: caller41 +; CHECK: ldp x1, x2, +; CHECK: ldp x3, x4, + %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 + %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 + %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5 + ret i32 %call +} + +declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 + +; structs with size < 16 bytes, alignment of 16 +; passed on stack at [sp+16] and [sp+32] +define i32 @caller41_stack() #1 { +entry: +; CHECK: caller41_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 + %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 + %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 + ret i32 %call +} + +; structs with size of 22 bytes, passed indirectly in x1 and x2 +define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 { +entry: +; CHECK: f42 +; CHECK: ldr w[[A:[0-9]+]], [x1] +; CHECK: ldr w[[B:[0-9]+]], [x2] +; CHECK: add w[[C:[0-9]+]], w[[A]], w0 +; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] +; FAST: f42 +; FAST: ldr w[[A:[0-9]+]], [x1] +; FAST: ldr w[[B:[0-9]+]], [x2] +; FAST: add w[[C:[0-9]+]], w[[A]], w0 +; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] + %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0 + %0 = load i32* %i1, align 4, !tbaa !0 + %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0 + %1 = load i32* %i2, align 4, !tbaa !0 + %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1 + %2 = load i16* %s, align 2, !tbaa !3 + %conv = sext i16 %2 to i32 + %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1 + %3 = load i16* %s5, align 2, !tbaa !3 + %conv6 = sext i16 %3 to i32 + %add = add i32 %0, %i + %add3 = add i32 %add, %1 + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +; For s1, we allocate a 22-byte space, pass its address via x1 +define i32 @caller42() #3 { +entry: +; CHECK: caller42 +; CHECK: str {{x[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK: str {{x[0-9]+}}, [sp, #16] +; CHECK: str {{q[0-9]+}}, [sp] +; CHECK: add x1, sp, #32 +; CHECK: mov x2, sp +; Space for s1 is allocated at sp+32 +; Space for s2 is allocated at sp + +; FAST: caller42 +; FAST: sub sp, sp, #96 +; Space for s1 is allocated at fp-24 = sp+72 +; Space for s2 is allocated at sp+48 +; FAST: sub x[[A:[0-9]+]], fp, #24 +; FAST: add x[[A:[0-9]+]], sp, #48 +; Call memcpy with size = 24 (0x18) +; FAST: orr {{x[0-9]+}}, xzr, #0x18 + %tmp = alloca %struct.s42, align 4 + %tmp1 = alloca %struct.s42, align 4 + %0 = bitcast %struct.s42* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s42* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 + ret i32 %call +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 + +declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, + %struct.s42* nocapture %s2) #2 + +define i32 @caller42_stack() #3 { +entry: +; CHECK: caller42_stack +; CHECK: mov fp, sp +; CHECK: sub sp, sp, #96 +; CHECK: stur {{x[0-9]+}}, [fp, #-16] +; CHECK: stur {{q[0-9]+}}, [fp, #-32] +; CHECK: str {{x[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; Space for s1 is allocated at fp-32 = sp+64 +; Space for s2 is allocated at sp+32 +; CHECK: add x[[B:[0-9]+]], sp, #32 +; CHECK: str x[[B]], [sp, #16] +; CHECK: sub x[[A:[0-9]+]], fp, #32 +; Address of s1 is passed on stack at sp+8 +; CHECK: str x[[A]], [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + +; FAST: caller42_stack +; Space for s1 is allocated at fp-24 +; Space for s2 is allocated at fp-48 +; FAST: sub x[[A:[0-9]+]], fp, #24 +; FAST: sub x[[B:[0-9]+]], fp, #48 +; Call memcpy with size = 24 (0x18) +; FAST: orr {{x[0-9]+}}, xzr, #0x18 +; FAST: str {{w[0-9]+}}, [sp] +; Address of s1 is passed on stack at sp+8 +; FAST: str {{x[0-9]+}}, [sp, #8] +; FAST: str {{x[0-9]+}}, [sp, #16] + %tmp = alloca %struct.s42, align 4 + %tmp1 = alloca %struct.s42, align 4 + %0 = bitcast %struct.s42* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s42* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 + ret i32 %call +} + +; structs with size of 22 bytes, alignment of 16 +; passed indirectly in x1 and x2 +define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 { +entry: +; CHECK: f43 +; CHECK: ldr w[[A:[0-9]+]], [x1] +; CHECK: ldr w[[B:[0-9]+]], [x2] +; CHECK: add w[[C:[0-9]+]], w[[A]], w0 +; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] +; FAST: f43 +; FAST: ldr w[[A:[0-9]+]], [x1] +; FAST: ldr w[[B:[0-9]+]], [x2] +; FAST: add w[[C:[0-9]+]], w[[A]], w0 +; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] + %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0 + %0 = load i32* %i1, align 4, !tbaa !0 + %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0 + %1 = load i32* %i2, align 4, !tbaa !0 + %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1 + %2 = load i16* %s, align 2, !tbaa !3 + %conv = sext i16 %2 to i32 + %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1 + %3 = load i16* %s5, align 2, !tbaa !3 + %conv6 = sext i16 %3 to i32 + %add = add i32 %0, %i + %add3 = add i32 %add, %1 + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller43() #3 { +entry: +; CHECK: caller43 +; CHECK: str {{q[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK: str {{q[0-9]+}}, [sp, #16] +; CHECK: str {{q[0-9]+}}, [sp] +; CHECK: add x1, sp, #32 +; CHECK: mov x2, sp +; Space for s1 is allocated at sp+32 +; Space for s2 is allocated at sp + +; FAST: caller43 +; FAST: mov fp, sp +; Space for s1 is allocated at sp+32 +; Space for s2 is allocated at sp +; FAST: add x1, sp, #32 +; FAST: mov x2, sp +; FAST: str {{x[0-9]+}}, [sp, #32] +; FAST: str {{x[0-9]+}}, [sp, #40] +; FAST: str {{x[0-9]+}}, [sp, #48] +; FAST: str {{x[0-9]+}}, [sp, #56] +; FAST: str {{x[0-9]+}}, [sp] +; FAST: str {{x[0-9]+}}, [sp, #8] +; FAST: str {{x[0-9]+}}, [sp, #16] +; FAST: str {{x[0-9]+}}, [sp, #24] + %tmp = alloca %struct.s43, align 16 + %tmp1 = alloca %struct.s43, align 16 + %0 = bitcast %struct.s43* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s43* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 + ret i32 %call +} + +declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1, + %struct.s43* nocapture %s2) #2 + +define i32 @caller43_stack() #3 { +entry: +; CHECK: caller43_stack +; CHECK: mov fp, sp +; CHECK: sub sp, sp, #96 +; CHECK: stur {{q[0-9]+}}, [fp, #-16] +; CHECK: stur {{q[0-9]+}}, [fp, #-32] +; CHECK: str {{q[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; Space for s1 is allocated at fp-32 = sp+64 +; Space for s2 is allocated at sp+32 +; CHECK: add x[[B:[0-9]+]], sp, #32 +; CHECK: str x[[B]], [sp, #16] +; CHECK: sub x[[A:[0-9]+]], fp, #32 +; Address of s1 is passed on stack at sp+8 +; CHECK: str x[[A]], [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + +; FAST: caller43_stack +; FAST: sub sp, sp, #96 +; Space for s1 is allocated at fp-32 = sp+64 +; Space for s2 is allocated at sp+32 +; FAST: sub x[[A:[0-9]+]], fp, #32 +; FAST: add x[[B:[0-9]+]], sp, #32 +; FAST: stur {{x[0-9]+}}, [fp, #-32] +; FAST: stur {{x[0-9]+}}, [fp, #-24] +; FAST: stur {{x[0-9]+}}, [fp, #-16] +; FAST: stur {{x[0-9]+}}, [fp, #-8] +; FAST: str {{x[0-9]+}}, [sp, #32] +; FAST: str {{x[0-9]+}}, [sp, #40] +; FAST: str {{x[0-9]+}}, [sp, #48] +; FAST: str {{x[0-9]+}}, [sp, #56] +; FAST: str {{w[0-9]+}}, [sp] +; Address of s1 is passed on stack at sp+8 +; FAST: str {{x[0-9]+}}, [sp, #8] +; FAST: str {{x[0-9]+}}, [sp, #16] + %tmp = alloca %struct.s43, align 16 + %tmp1 = alloca %struct.s43, align 16 + %0 = bitcast %struct.s43* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s43* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 + ret i32 %call +} + +; rdar://13668927 +; Check that we don't split an i128. +declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, + i32 %i6, i32 %i7, i128 %s1, i32 %i8) + +define i32 @i128_split() { +entry: +; CHECK: i128_split +; "i128 %0" should be on stack at [sp]. +; "i32 8" should be on stack at [sp, #16]. +; CHECK: str {{w[0-9]+}}, [sp, #16] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; FAST: i128_split +; FAST: mov x[[ADDR:[0-9]+]], sp +; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] +; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]] + %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 + %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i128 %0, i32 8) #5 + ret i32 %call +} + +declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, + i32 %i6, i32 %i7, i64 %s1, i32 %i8) + +define i32 @i64_split() { +entry: +; CHECK: i64_split +; "i64 %0" should be in register x7. +; "i32 8" should be on stack at [sp]. +; CHECK: ldr x7, [{{x[0-9]+}}] +; CHECK: str {{w[0-9]+}}, [sp] +; FAST: i64_split +; FAST: ldr x7, [{{x[0-9]+}}] +; FAST: str {{w[0-9]+}}, [sp] + %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16 + %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i64 %0, i32 8) #5 + ret i32 %call +} + +attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #4 = { nounwind } +attributes #5 = { nobuiltin } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"short", metadata !1} +!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3} diff --git a/test/CodeGen/ARM64/addp.ll b/test/CodeGen/ARM64/addp.ll new file mode 100644 index 0000000..8283a00 --- /dev/null +++ b/test/CodeGen/ARM64/addp.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define double @foo(<2 x double> %a) nounwind { +; CHECK-LABEL: foo: +; CHECK: faddp.2d d0, v0 +; CHECK-NEXT: ret + %lane0.i = extractelement <2 x double> %a, i32 0 + %lane1.i = extractelement <2 x double> %a, i32 1 + %vpaddd.i = fadd double %lane0.i, %lane1.i + ret double %vpaddd.i +} + +define i64 @foo0(<2 x i64> %a) nounwind { +; CHECK-LABEL: foo0: +; CHECK: addp.2d d0, v0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret + %lane0.i = extractelement <2 x i64> %a, i32 0 + %lane1.i = extractelement <2 x i64> %a, i32 1 + %vpaddd.i = add i64 %lane0.i, %lane1.i + ret i64 %vpaddd.i +} + +define float @foo1(<2 x float> %a) nounwind { +; CHECK-LABEL: foo1: +; CHECK: faddp.2s +; CHECK-NEXT: ret + %lane0.i = extractelement <2 x float> %a, i32 0 + %lane1.i = extractelement <2 x float> %a, i32 1 + %vpaddd.i = fadd float %lane0.i, %lane1.i + ret float %vpaddd.i +} diff --git a/test/CodeGen/ARM64/addr-mode-folding.ll b/test/CodeGen/ARM64/addr-mode-folding.ll new file mode 100644 index 0000000..dff2331 --- /dev/null +++ b/test/CodeGen/ARM64/addr-mode-folding.ll @@ -0,0 +1,171 @@ +; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s +; + +@block = common global i8* null, align 8 + +define i32 @fct(i32 %i1, i32 %i2) { +; CHECK: @fct +; Sign extension is used more than once, thus it should not be folded. +; CodeGenPrepare is not sharing sext accross uses, thus this is folded because +; of that. +; _CHECK-NOT_: , sxtw] +entry: + %idxprom = sext i32 %i1 to i64 + %0 = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom + %1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1 + %2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %1, %2 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %1, %2 + %conv8 = zext i1 %cmp7 to i32 + br label %return + +if.end: ; preds = %entry + %inc = add nsw i32 %i1, 1 + %inc9 = add nsw i32 %i2, 1 + %idxprom10 = sext i32 %inc to i64 + %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10 + %3 = load i8* %arrayidx11, align 1 + %idxprom12 = sext i32 %inc9 to i64 + %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12 + %4 = load i8* %arrayidx13, align 1 + %cmp16 = icmp eq i8 %3, %4 + br i1 %cmp16, label %if.end23, label %if.then18 + +if.then18: ; preds = %if.end + %cmp21 = icmp ugt i8 %3, %4 + %conv22 = zext i1 %cmp21 to i32 + br label %return + +if.end23: ; preds = %if.end + %inc24 = add nsw i32 %i1, 2 + %inc25 = add nsw i32 %i2, 2 + %idxprom26 = sext i32 %inc24 to i64 + %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26 + %5 = load i8* %arrayidx27, align 1 + %idxprom28 = sext i32 %inc25 to i64 + %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28 + %6 = load i8* %arrayidx29, align 1 + %cmp32 = icmp eq i8 %5, %6 + br i1 %cmp32, label %return, label %if.then34 + +if.then34: ; preds = %if.end23 + %cmp37 = icmp ugt i8 %5, %6 + %conv38 = zext i1 %cmp37 to i32 + br label %return + +return: ; preds = %if.end23, %if.then34, %if.then18, %if.then + %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ] + ret i32 %retval.0 +} + +define i32 @fct1(i32 %i1, i32 %i2) optsize { +; CHECK: @fct1 +; Addressing are folded when optimizing for code size. +; CHECK: , sxtw] +; CHECK: , sxtw] +entry: + %idxprom = sext i32 %i1 to i64 + %0 = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom + %1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1 + %2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %1, %2 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %1, %2 + %conv8 = zext i1 %cmp7 to i32 + br label %return + +if.end: ; preds = %entry + %inc = add nsw i32 %i1, 1 + %inc9 = add nsw i32 %i2, 1 + %idxprom10 = sext i32 %inc to i64 + %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10 + %3 = load i8* %arrayidx11, align 1 + %idxprom12 = sext i32 %inc9 to i64 + %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12 + %4 = load i8* %arrayidx13, align 1 + %cmp16 = icmp eq i8 %3, %4 + br i1 %cmp16, label %if.end23, label %if.then18 + +if.then18: ; preds = %if.end + %cmp21 = icmp ugt i8 %3, %4 + %conv22 = zext i1 %cmp21 to i32 + br label %return + +if.end23: ; preds = %if.end + %inc24 = add nsw i32 %i1, 2 + %inc25 = add nsw i32 %i2, 2 + %idxprom26 = sext i32 %inc24 to i64 + %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26 + %5 = load i8* %arrayidx27, align 1 + %idxprom28 = sext i32 %inc25 to i64 + %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28 + %6 = load i8* %arrayidx29, align 1 + %cmp32 = icmp eq i8 %5, %6 + br i1 %cmp32, label %return, label %if.then34 + +if.then34: ; preds = %if.end23 + %cmp37 = icmp ugt i8 %5, %6 + %conv38 = zext i1 %cmp37 to i32 + br label %return + +return: ; preds = %if.end23, %if.then34, %if.then18, %if.then + %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ] + ret i32 %retval.0 +} + +; CHECK: @test +; CHECK-NOT: , uxtw #2] +define i32 @test(i32* %array, i8 zeroext %c, i32 %arg) { +entry: + %conv = zext i8 %c to i32 + %add = sub i32 0, %arg + %tobool = icmp eq i32 %conv, %add + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i8 %c to i64 + %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom + %0 = load volatile i32* %arrayidx, align 4 + %1 = load volatile i32* %arrayidx, align 4 + %add3 = add nsw i32 %1, %0 + br label %if.end + +if.end: ; preds = %entry, %if.then + %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ] + ret i32 %res.0 +} + + +; CHECK: @test2 +; CHECK: , uxtw #2] +; CHECK: , uxtw #2] +define i32 @test2(i32* %array, i8 zeroext %c, i32 %arg) optsize { +entry: + %conv = zext i8 %c to i32 + %add = sub i32 0, %arg + %tobool = icmp eq i32 %conv, %add + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i8 %c to i64 + %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom + %0 = load volatile i32* %arrayidx, align 4 + %1 = load volatile i32* %arrayidx, align 4 + %add3 = add nsw i32 %1, %0 + br label %if.end + +if.end: ; preds = %entry, %if.then + %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ] + ret i32 %res.0 +} diff --git a/test/CodeGen/ARM64/addr-type-promotion.ll b/test/CodeGen/ARM64/addr-type-promotion.ll new file mode 100644 index 0000000..0677603 --- /dev/null +++ b/test/CodeGen/ARM64/addr-type-promotion.ll @@ -0,0 +1,82 @@ +; RUN: llc -march arm64 < %s | FileCheck %s +; rdar://13452552 +; ModuleID = 'reduced_test.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios3.0.0" + +@block = common global i8* null, align 8 + +define zeroext i8 @fullGtU(i32 %i1, i32 %i2) { +; CHECK: fullGtU +; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE +; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF] +; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]] +; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], x0, sxtw] +; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], x1, sxtw] +; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]] +; CHECK-NEXT b.ne +; Next BB +; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw +; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw +; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1] +; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1] +; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]] +; CHECK-NEXT: b.ne +; Next BB +; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2] +; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2] +; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]] +entry: + %idxprom = sext i32 %i1 to i64 + %tmp = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom + %tmp1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1 + %tmp2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %tmp1, %tmp2 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %tmp1, %tmp2 + %conv9 = zext i1 %cmp7 to i8 + br label %return + +if.end: ; preds = %entry + %inc = add nsw i32 %i1, 1 + %inc10 = add nsw i32 %i2, 1 + %idxprom11 = sext i32 %inc to i64 + %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11 + %tmp3 = load i8* %arrayidx12, align 1 + %idxprom13 = sext i32 %inc10 to i64 + %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13 + %tmp4 = load i8* %arrayidx14, align 1 + %cmp17 = icmp eq i8 %tmp3, %tmp4 + br i1 %cmp17, label %if.end25, label %if.then19 + +if.then19: ; preds = %if.end + %cmp22 = icmp ugt i8 %tmp3, %tmp4 + %conv24 = zext i1 %cmp22 to i8 + br label %return + +if.end25: ; preds = %if.end + %inc26 = add nsw i32 %i1, 2 + %inc27 = add nsw i32 %i2, 2 + %idxprom28 = sext i32 %inc26 to i64 + %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28 + %tmp5 = load i8* %arrayidx29, align 1 + %idxprom30 = sext i32 %inc27 to i64 + %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30 + %tmp6 = load i8* %arrayidx31, align 1 + %cmp34 = icmp eq i8 %tmp5, %tmp6 + br i1 %cmp34, label %return, label %if.then36 + +if.then36: ; preds = %if.end25 + %cmp39 = icmp ugt i8 %tmp5, %tmp6 + %conv41 = zext i1 %cmp39 to i8 + br label %return + +return: ; preds = %if.then36, %if.end25, %if.then19, %if.then + %retval.0 = phi i8 [ %conv9, %if.then ], [ %conv24, %if.then19 ], [ %conv41, %if.then36 ], [ 0, %if.end25 ] + ret i8 %retval.0 +} diff --git a/test/CodeGen/ARM64/addrmode.ll b/test/CodeGen/ARM64/addrmode.ll new file mode 100644 index 0000000..e131237 --- /dev/null +++ b/test/CodeGen/ARM64/addrmode.ll @@ -0,0 +1,72 @@ +; RUN: llc -march=arm64 < %s | FileCheck %s +; rdar://10232252 + +@object = external hidden global i64, section "__DATA, __objc_ivar", align 8 + +; base + offset (imm9) +; CHECK: @t1 +; CHECK: ldr xzr, [x{{[0-9]+}}, #8] +; CHECK: ret +define void @t1() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 1 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + offset (> imm9) +; CHECK: @t2 +; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264 +; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] +; CHECK: ret +define void @t2() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 -33 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes) +; CHECK: @t3 +; CHECK: ldr xzr, [x{{[0-9]+}}, #32760] +; CHECK: ret +define void @t3() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 4095 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + unsigned offset (> imm12 * size of type in bytes) +; CHECK: @t4 +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #32768 +; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] +; CHECK: ret +define void @t4() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 4096 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + reg +; CHECK: @t5 +; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] +; CHECK: ret +define void @t5(i64 %a) { + %incdec.ptr = getelementptr inbounds i64* @object, i64 %a + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + reg + imm +; CHECK: @t6 +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 +; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #32768 +; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] +; CHECK: ret +define void @t6(i64 %a) { + %tmp1 = getelementptr inbounds i64* @object, i64 %a + %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} diff --git a/test/CodeGen/ARM64/alloc-no-stack-realign.ll b/test/CodeGen/ARM64/alloc-no-stack-realign.ll new file mode 100644 index 0000000..f396bc9 --- /dev/null +++ b/test/CodeGen/ARM64/alloc-no-stack-realign.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s + +; rdar://12713765 +; Make sure we are not creating stack objects that are assumed to be 64-byte +; aligned. +@T3_retval = common global <16 x float> zeroinitializer, align 16 + +define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp { +entry: +; CHECK: test +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp, #32] +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp] +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]], #32] +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]] + %retval = alloca <16 x float>, align 16 + %0 = load <16 x float>* @T3_retval, align 16 + store <16 x float> %0, <16 x float>* %retval + %1 = load <16 x float>* %retval + store <16 x float> %1, <16 x float>* %agg.result, align 16 + ret void +} diff --git a/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll b/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll new file mode 100644 index 0000000..3750f31 --- /dev/null +++ b/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s + +; CHECK: foo +; CHECK: ldr w[[REG:[0-9]+]], [x19, #264] +; CHECK: str w[[REG]], [x19, #132] +; CHECK: ldr w{{[0-9]+}}, [x19, #264] + +define i32 @foo(i32 %a) nounwind { + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %arr = alloca [32 x i32], align 4 + %i = alloca i32, align 4 + %arr2 = alloca [32 x i32], align 4 + %j = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %tmp = load i32* %a.addr, align 4 + %tmp1 = zext i32 %tmp to i64 + %v = mul i64 4, %tmp1 + %vla = alloca i8, i64 %v, align 4 + %tmp2 = bitcast i8* %vla to i32* + %tmp3 = load i32* %a.addr, align 4 + store i32 %tmp3, i32* %i, align 4 + %tmp4 = load i32* %a.addr, align 4 + store i32 %tmp4, i32* %j, align 4 + %tmp5 = load i32* %j, align 4 + store i32 %tmp5, i32* %retval + %x = load i32* %retval + ret i32 %x +} diff --git a/test/CodeGen/ARM64/andCmpBrToTBZ.ll b/test/CodeGen/ARM64/andCmpBrToTBZ.ll new file mode 100644 index 0000000..4194977 --- /dev/null +++ b/test/CodeGen/ARM64/andCmpBrToTBZ.ll @@ -0,0 +1,72 @@ +; RUN: llc -O1 -march=arm64 -enable-andcmp-sinking=true < %s | FileCheck %s +; ModuleID = 'and-cbz-extr-mr.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8* %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8, i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8* %str14) unnamed_addr #0 align 2 { +; CHECK: _foo: +entry: + %tobool = icmp eq i8* %str14, null + br i1 %tobool, label %return, label %if.end + +; CHECK: %if.end +; CHECK: tbz +if.end: ; preds = %entry + %and.i.i.i = and i32 %int1, 4 + %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0 + br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i + +land.rhs.i: ; preds = %if.end + %cmp.i.i.i = icmp eq i8* %str12, %str13 + br i1 %cmp.i.i.i, label %if.then3, label %lor.rhs.i.i.i + +lor.rhs.i.i.i: ; preds = %land.rhs.i + %cmp.i13.i.i.i = icmp eq i8* %str10, %str11 + br i1 %cmp.i13.i.i.i, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, label %if.end5 + +_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds = %lor.rhs.i.i.i + %cmp.i.i.i.i = icmp eq i8* %str8, %str9 + br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5 + +if.then3: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i + %tmp11 = load i8* %str14, align 8 + %tmp12 = and i8 %tmp11, 2 + %tmp13 = icmp ne i8 %tmp12, 0 + br label %return + +if.end5: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i +; CHECK: %if.end5 +; CHECK: tbz + br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19 + +land.rhs.i19: ; preds = %if.end5 + %cmp.i.i.i18 = icmp eq i8* %str6, %str7 + br i1 %cmp.i.i.i18, label %if.then7, label %lor.rhs.i.i.i23 + +lor.rhs.i.i.i23: ; preds = %land.rhs.i19 + %cmp.i13.i.i.i22 = icmp eq i8* %str3, %str4 + br i1 %cmp.i13.i.i.i22, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, label %if.end12 + +_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28: ; preds = %lor.rhs.i.i.i23 + %cmp.i.i.i.i26 = icmp eq i8* %str1, %str2 + br i1 %cmp.i.i.i.i26, label %if.then7, label %if.end12 + +if.then7: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %land.rhs.i19 + br i1 %isTextField, label %if.then9, label %if.end12 + +if.then9: ; preds = %if.then7 + %tmp23 = load i8* %str5, align 8 + %tmp24 = and i8 %tmp23, 2 + %tmp25 = icmp ne i8 %tmp24, 0 + br label %return + +if.end12: ; preds = %if.then7, %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %lor.rhs.i.i.i23, %if.end5, %if.end + %lnot = xor i1 %IsEditable, true + br label %return + +return: ; preds = %if.end12, %if.then9, %if.then3, %entry + %retval.0 = phi i1 [ %tmp13, %if.then3 ], [ %tmp25, %if.then9 ], [ %lnot, %if.end12 ], [ true, %entry ] + ret i1 %retval.0 +} + +attributes #0 = { nounwind ssp } diff --git a/test/CodeGen/ARM64/anyregcc-crash.ll b/test/CodeGen/ARM64/anyregcc-crash.ll new file mode 100644 index 0000000..241cf97 --- /dev/null +++ b/test/CodeGen/ARM64/anyregcc-crash.ll @@ -0,0 +1,19 @@ +; RUN: not llc < %s -mtriple=arm64-apple-darwin 2>&1 | FileCheck %s +; +; Check that misuse of anyregcc results in a compile time error. + +; CHECK: LLVM ERROR: ran out of registers during register allocation +define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8, + i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16, + i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24, + i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) { +entry: + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32, + i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8, + i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16, + i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24, + i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) + ret i64 %result +} + +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/test/CodeGen/ARM64/anyregcc.ll b/test/CodeGen/ARM64/anyregcc.ll new file mode 100644 index 0000000..e26875d --- /dev/null +++ b/test/CodeGen/ARM64/anyregcc.ll @@ -0,0 +1,363 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s + +; Stackmap Header: no constants - 6 callsites +; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; Header +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 0 +; Num Functions +; CHECK-NEXT: .long 8 +; Num LargeConstants +; CHECK-NEXT: .long 0 +; Num Callsites +; CHECK-NEXT: .long 8 + +; Functions and stack size +; CHECK-NEXT: .quad _test +; CHECK-NEXT: .quad 16 +; CHECK-NEXT: .quad _property_access1 +; CHECK-NEXT: .quad 16 +; CHECK-NEXT: .quad _property_access2 +; CHECK-NEXT: .quad 32 +; CHECK-NEXT: .quad _property_access3 +; CHECK-NEXT: .quad 32 +; CHECK-NEXT: .quad _anyreg_test1 +; CHECK-NEXT: .quad 16 +; CHECK-NEXT: .quad _anyreg_test2 +; CHECK-NEXT: .quad 16 +; CHECK-NEXT: .quad _patchpoint_spilldef +; CHECK-NEXT: .quad 112 +; CHECK-NEXT: .quad _patchpoint_spillargs +; CHECK-NEXT: .quad 128 + + +; test +; CHECK-LABEL: .long L{{.*}}-_test +; CHECK-NEXT: .short 0 +; 3 locations +; CHECK-NEXT: .short 3 +; Loc 0: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Constant 3 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 3 +define i64 @test() nounwind ssp uwtable { +entry: + call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3) + ret i64 0 +} + +; property access 1 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-LABEL: .long L{{.*}}-_property_access1 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access1(i8* %obj) nounwind ssp uwtable { +entry: + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj) + ret i64 %ret +} + +; property access 2 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-LABEL: .long L{{.*}}-_property_access2 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access2() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj) + ret i64 %ret +} + +; property access 3 - %obj is a frame index +; CHECK-LABEL: .long L{{.*}}-_property_access3 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Direct FP - 8 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 29 +; CHECK-NEXT: .long -8 +define i64 @property_access3() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj) + ret i64 %ret +} + +; anyreg_test1 +; CHECK-LABEL: .long L{{.*}}-_anyreg_test1 +; CHECK-NEXT: .short 0 +; 14 locations +; CHECK-NEXT: .short 14 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { +entry: + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) + ret i64 %ret +} + +; anyreg_test2 +; CHECK-LABEL: .long L{{.*}}-_anyreg_test2 +; CHECK-NEXT: .short 0 +; 14 locations +; CHECK-NEXT: .short 14 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { +entry: + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) + ret i64 %ret +} + +; Test spilling the return value of an anyregcc call. +; +; [JS] Assertion: "Folded a def to a non-store!" +; +; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 3 +; Loc 0: Register (some register that will be spilled to the stack) +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { +entry: + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2) + tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind + ret i64 %result +} + +; Test spilling the arguments of an anyregcc call. +; +; [JS] AnyRegCC argument ends up being spilled +; +; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 5 +; Loc 0: Return a register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Arg0 in a Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Arg1 in a Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Arg2 spilled to FP -96 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 29 +; CHECK-NEXT: .long -96 +; Loc 4: Arg3 spilled to FP - 88 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 29 +; CHECK-NEXT: .long -88 +define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { +entry: + tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4) + ret i64 %result +} + +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/test/CodeGen/ARM64/arith-saturating.ll b/test/CodeGen/ARM64/arith-saturating.ll new file mode 100644 index 0000000..437ebb8 --- /dev/null +++ b/test/CodeGen/ARM64/arith-saturating.ll @@ -0,0 +1,153 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s + +define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qadds: +; CHECK: sqadd s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqadd.i +} + +define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qaddd: +; CHECK: sqadd d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqadd.i +} + +define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqadds: +; CHECK: uqadd s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqadd.i +} + +define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqaddd: +; CHECK: uqadd d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqadd.i +} + +declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone + +define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qsubs: +; CHECK: sqsub s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqsub.i +} + +define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qsubd: +; CHECK: sqsub d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqsub.i +} + +define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqsubs: +; CHECK: uqsub s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqsub.i +} + +define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqsubd: +; CHECK: uqsub d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqsub.i +} + +declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone + +define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { +; CHECK-LABEL: qabss: +; CHECK: sqabs s0, s0 +; CHECK: ret + %vecext = extractelement <4 x i32> %b, i32 0 + %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext) nounwind + ret i32 %vqabs.i +} + +define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { +; CHECK-LABEL: qabsd: +; CHECK: sqabs d0, d0 +; CHECK: ret + %vecext = extractelement <2 x i64> %b, i32 0 + %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext) nounwind + ret i64 %vqabs.i +} + +define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { +; CHECK-LABEL: qnegs: +; CHECK: sqneg s0, s0 +; CHECK: ret + %vecext = extractelement <4 x i32> %b, i32 0 + %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext) nounwind + ret i32 %vqneg.i +} + +define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { +; CHECK-LABEL: qnegd: +; CHECK: sqneg d0, d0 +; CHECK: ret + %vecext = extractelement <2 x i64> %b, i32 0 + %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext) nounwind + ret i64 %vqneg.i +} + +declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone +declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone + + +define i32 @vqmovund(<2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vqmovund: +; CHECK: sqxtun s0, d0 + %vecext = extractelement <2 x i64> %b, i32 0 + %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind + ret i32 %vqmovun.i +} + +define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vqmovnd_s: +; CHECK: sqxtn s0, d0 + %vecext = extractelement <2 x i64> %b, i32 0 + %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind + ret i32 %vqmovn.i +} + +define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vqmovnd_u: +; CHECK: uqxtn s0, d0 + %vecext = extractelement <2 x i64> %b, i32 0 + %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind + ret i32 %vqmovn.i +} + +declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone diff --git a/test/CodeGen/ARM64/arith.ll b/test/CodeGen/ARM64/arith.ll new file mode 100644 index 0000000..b6ff0da --- /dev/null +++ b/test/CodeGen/ARM64/arith.ll @@ -0,0 +1,262 @@ +; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s + +define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t1: +; CHECK: add w0, w1, w0 +; CHECK: ret + %add = add i32 %b, %a + ret i32 %add +} + +define i32 @t2(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t2: +; CHECK: udiv w0, w0, w1 +; CHECK: ret + %udiv = udiv i32 %a, %b + ret i32 %udiv +} + +define i64 @t3(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t3: +; CHECK: udiv x0, x0, x1 +; CHECK: ret + %udiv = udiv i64 %a, %b + ret i64 %udiv +} + +define i32 @t4(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t4: +; CHECK: sdiv w0, w0, w1 +; CHECK: ret + %sdiv = sdiv i32 %a, %b + ret i32 %sdiv +} + +define i64 @t5(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t5: +; CHECK: sdiv x0, x0, x1 +; CHECK: ret + %sdiv = sdiv i64 %a, %b + ret i64 %sdiv +} + +define i32 @t6(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t6: +; CHECK: lslv w0, w0, w1 +; CHECK: ret + %shl = shl i32 %a, %b + ret i32 %shl +} + +define i64 @t7(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t7: +; CHECK: lslv x0, x0, x1 +; CHECK: ret + %shl = shl i64 %a, %b + ret i64 %shl +} + +define i32 @t8(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t8: +; CHECK: lsrv w0, w0, w1 +; CHECK: ret + %lshr = lshr i32 %a, %b + ret i32 %lshr +} + +define i64 @t9(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t9: +; CHECK: lsrv x0, x0, x1 +; CHECK: ret + %lshr = lshr i64 %a, %b + ret i64 %lshr +} + +define i32 @t10(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t10: +; CHECK: asrv w0, w0, w1 +; CHECK: ret + %ashr = ashr i32 %a, %b + ret i32 %ashr +} + +define i64 @t11(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t11: +; CHECK: asrv x0, x0, x1 +; CHECK: ret + %ashr = ashr i64 %a, %b + ret i64 %ashr +} + +define i32 @t12(i16 %a, i32 %x) nounwind ssp { +entry: +; CHECK-LABEL: t12: +; CHECK: add w0, w1, w0, sxth +; CHECK: ret + %c = sext i16 %a to i32 + %e = add i32 %x, %c + ret i32 %e +} + +define i32 @t13(i16 %a, i32 %x) nounwind ssp { +entry: +; CHECK-LABEL: t13: +; CHECK: add w0, w1, w0, sxth #2 +; CHECK: ret + %c = sext i16 %a to i32 + %d = shl i32 %c, 2 + %e = add i32 %x, %d + ret i32 %e +} + +define i64 @t14(i16 %a, i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t14: +; CHECK: add x0, x1, w0, uxth #3 +; CHECK: ret + %c = zext i16 %a to i64 + %d = shl i64 %c, 3 + %e = add i64 %x, %d + ret i64 %e +} + +; rdar://9160598 +define i64 @t15(i64 %a, i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t15: +; CHECK: add x0, x1, w0, uxtw +; CHECK: ret + %b = and i64 %a, 4294967295 + %c = add i64 %x, %b + ret i64 %c +} + +define i64 @t16(i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t16: +; CHECK: lsl x0, x0, #1 +; CHECK: ret + %a = shl i64 %x, 1 + ret i64 %a +} + +; rdar://9166974 +define i64 @t17(i16 %a, i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t17: +; CHECK: sxth [[REG:x[0-9]+]], x0 +; CHECK: sub x0, xzr, [[REG]], lsl #32 +; CHECK: ret + %tmp16 = sext i16 %a to i64 + %tmp17 = mul i64 %tmp16, -4294967296 + ret i64 %tmp17 +} + +define i32 @t18(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t18: +; CHECK: sdiv w0, w0, w1 +; CHECK: ret + %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b) + ret i32 %sdiv +} + +define i64 @t19(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t19: +; CHECK: sdiv x0, x0, x1 +; CHECK: ret + %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b) + ret i64 %sdiv +} + +define i32 @t20(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t20: +; CHECK: udiv w0, w0, w1 +; CHECK: ret + %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b) + ret i32 %udiv +} + +define i64 @t21(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t21: +; CHECK: udiv x0, x0, x1 +; CHECK: ret + %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b) + ret i64 %udiv +} + +declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone + +; 32-bit not. +define i32 @inv_32(i32 %x) nounwind ssp { +entry: +; CHECK: inv_32 +; CHECK: mvn w0, w0 +; CHECK: ret + %inv = xor i32 %x, -1 + ret i32 %inv +} + +; 64-bit not. +define i64 @inv_64(i64 %x) nounwind ssp { +entry: +; CHECK: inv_64 +; CHECK: mvn x0, x0 +; CHECK: ret + %inv = xor i64 %x, -1 + ret i64 %inv +} + +; Multiplying by a power of two plus or minus one is better done via shift +; and add/sub rather than the madd/msub instructions. The latter are 4+ cycles, +; and the former are two (total for the two instruction sequence for subtract). +define i32 @f0(i32 %a) nounwind readnone ssp { +; CHECK-LABEL: f0: +; CHECK-NEXT: add w0, w0, w0, lsl #3 +; CHECK-NEXT: ret + %res = mul i32 %a, 9 + ret i32 %res +} + +define i64 @f1(i64 %a) nounwind readnone ssp { +; CHECK-LABEL: f1: +; CHECK-NEXT: lsl x8, x0, #4 +; CHECK-NEXT: sub x0, x8, x0 +; CHECK-NEXT: ret + %res = mul i64 %a, 15 + ret i64 %res +} + +define i32 @f2(i32 %a) nounwind readnone ssp { +; CHECK-LABEL: f2: +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: sub w0, w8, w0 +; CHECK-NEXT: ret + %res = mul nsw i32 %a, 7 + ret i32 %res +} + +define i64 @f3(i64 %a) nounwind readnone ssp { +; CHECK-LABEL: f3: +; CHECK-NEXT: add x0, x0, x0, lsl #4 +; CHECK-NEXT: ret + %res = mul nsw i64 %a, 17 + ret i64 %res +} diff --git a/test/CodeGen/ARM64/atomic-128.ll b/test/CodeGen/ARM64/atomic-128.ll new file mode 100644 index 0000000..a0039a3 --- /dev/null +++ b/test/CodeGen/ARM64/atomic-128.ll @@ -0,0 +1,213 @@ +; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu -verify-machineinstrs | FileCheck %s + +@var = global i128 0 + +define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LABEL: val_compare_and_swap: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x0] +; CHECK: cmp [[RESULTLO]], x2 +; CHECK: sbc xzr, [[RESULTHI]], x3 +; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] +; CHECK: [[LABEL2]]: + %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + ret i128 %val +} + +define void @fetch_and_nand(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_nand: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: bic [[SCRATCH_REGLO:x[0-9]+]], x2, [[DEST_REGLO]] +; CHECK: bic [[SCRATCH_REGHI:x[0-9]+]], x3, [[DEST_REGHI]] +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw nand i128* %p, i128 %bits release + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_or(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_or: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: orr [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK: orr [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw or i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_add(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_add: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: adds [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK: adc [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw add i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_sub(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_sub: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: subs [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK: sbc [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw sub i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_min(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_min: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, lt +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, lt +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw min i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_max(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_max: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, gt +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, gt +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw max i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_umin(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_umin: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, cc +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, cc +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw umin i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_umax(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_umax: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, hi +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, hi +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw umax i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define i128 @atomic_load_seq_cst(i128* %p) { +; CHECK-LABEL: atomic_load_seq_cst: +; CHECK-NOT: dmb +; CHECK-LABEL: ldaxp +; CHECK-NOT: dmb + %r = load atomic i128* %p seq_cst, align 16 + ret i128 %r +} + +define i128 @atomic_load_relaxed(i128* %p) { +; CHECK-LABEL: atomic_load_relaxed: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] +; CHECK: orr [[SAMELO:x[0-9]+]], [[LO]], xzr +; CHECK: orr [[SAMEHI:x[0-9]+]], [[HI]], xzr +; CHECK: stxp [[SUCCESS:w[0-9]+]], [[SAMELO]], [[SAMEHI]], [x0] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + %r = load atomic i128* %p monotonic, align 16 + ret i128 %r +} + + +define void @atomic_store_seq_cst(i128 %in, i128* %p) { +; CHECK-LABEL: atomic_store_seq_cst: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp xzr, xzr, [x2] +; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + store atomic i128 %in, i128* %p seq_cst, align 16 + ret void +} + +define void @atomic_store_release(i128 %in, i128* %p) { +; CHECK-LABEL: atomic_store_release: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp xzr, xzr, [x2] +; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + store atomic i128 %in, i128* %p release, align 16 + ret void +} + +define void @atomic_store_relaxed(i128 %in, i128* %p) { +; CHECK-LABEL: atomic_store_relaxed: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp xzr, xzr, [x2] +; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + store atomic i128 %in, i128* %p unordered, align 16 + ret void +} diff --git a/test/CodeGen/ARM64/atomic.ll b/test/CodeGen/ARM64/atomic.ll new file mode 100644 index 0000000..cf8cf7d --- /dev/null +++ b/test/CodeGen/ARM64/atomic.ll @@ -0,0 +1,343 @@ +; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s + +define i32 @val_compare_and_swap(i32* %p) { +; CHECK-LABEL: val_compare_and_swap: +; CHECK: orr [[NEWVAL_REG:w[0-9]+]], wzr, #0x4 +; CHECK: orr [[OLDVAL_REG:w[0-9]+]], wzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK: cmp [[RESULT]], [[OLDVAL_REG]] +; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: [[LABEL2]]: + %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire + ret i32 %val +} + +define i64 @val_compare_and_swap_64(i64* %p) { +; CHECK-LABEL: val_compare_and_swap_64: +; CHECK: orr [[NEWVAL_REG:x[0-9]+]], xzr, #0x4 +; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxr [[RESULT:x[0-9]+]], [x0] +; CHECK: cmp [[RESULT]], [[OLDVAL_REG]] +; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NOT: stxr [[NEWVAL_REG]], [[NEWVAL_REG]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: [[LABEL2]]: + %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic + ret i64 %val +} + +define i32 @fetch_and_nand(i32* %p) { +; CHECK-LABEL: fetch_and_nand: +; CHECK: orr [[OLDVAL_REG:w[0-9]+]], wzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0] +; CHECK: bic [[SCRATCH2_REG:w[0-9]+]], [[OLDVAL_REG]], w[[DEST_REG]] +; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, x[[DEST_REG]] + %val = atomicrmw nand i32* %p, i32 7 release + ret i32 %val +} + +define i64 @fetch_and_nand_64(i64* %p) { +; CHECK-LABEL: fetch_and_nand_64: +; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x0] +; CHECK: bic [[SCRATCH2_REG:x[0-9]+]], [[OLDVAL_REG]], [[DEST_REG]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, [[DEST_REG]] + %val = atomicrmw nand i64* %p, i64 7 acq_rel + ret i64 %val +} + +define i32 @fetch_and_or(i32* %p) { +; CHECK-LABEL: fetch_and_or: +; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #5 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0] +; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]] +; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, x[[DEST_REG]] + %val = atomicrmw or i32* %p, i32 5 seq_cst + ret i32 %val +} + +define i64 @fetch_and_or_64(i64* %p) { +; CHECK: fetch_and_or_64: +; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x0] +; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], [[OLDVAL_REG]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, [[DEST_REG]] + %val = atomicrmw or i64* %p, i64 7 monotonic + ret i64 %val +} + +define void @acquire_fence() { + fence acquire + ret void + ; CHECK-LABEL: acquire_fence: + ; CHECK: dmb ishld +} + +define void @release_fence() { + fence release + ret void + ; CHECK-LABEL: release_fence: + ; CHECK: dmb ish{{$}} +} + +define void @seq_cst_fence() { + fence seq_cst + ret void + ; CHECK-LABEL: seq_cst_fence: + ; CHECK: dmb ish{{$}} +} + +define i32 @atomic_load(i32* %p) { + %r = load atomic i32* %p seq_cst, align 4 + ret i32 %r + ; CHECK-LABEL: atomic_load: + ; CHECK: ldar +} + +define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_8: + %ptr_unsigned = getelementptr i8* %p, i32 4095 + %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1 +; CHECK: ldrb {{w[0-9]+}}, [x0, #4095] + + %ptr_regoff = getelementptr i8* %p, i32 %off32 + %val_regoff = load atomic i8* %ptr_regoff unordered, align 1 + %tot1 = add i8 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldrb {{w[0-9]+}}, [x0, x1, sxtw] + + %ptr_unscaled = getelementptr i8* %p, i32 -256 + %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1 + %tot2 = add i8 %tot1, %val_unscaled +; CHECK: ldurb {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + %val_random = load atomic i8* %ptr_random unordered, align 1 + %tot3 = add i8 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]] + + ret i8 %tot3 +} + +define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_16: + %ptr_unsigned = getelementptr i16* %p, i32 4095 + %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2 +; CHECK: ldrh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr i16* %p, i32 %off32 + %val_regoff = load atomic i16* %ptr_regoff unordered, align 2 + %tot1 = add i16 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldrh {{w[0-9]+}}, [x0, x1, sxtw #1] + + %ptr_unscaled = getelementptr i16* %p, i32 -128 + %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2 + %tot2 = add i16 %tot1, %val_unscaled +; CHECK: ldurh {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + %val_random = load atomic i16* %ptr_random unordered, align 2 + %tot3 = add i16 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]] + + ret i16 %tot3 +} + +define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_32: + %ptr_unsigned = getelementptr i32* %p, i32 4095 + %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4 +; CHECK: ldr {{w[0-9]+}}, [x0, #16380] + + %ptr_regoff = getelementptr i32* %p, i32 %off32 + %val_regoff = load atomic i32* %ptr_regoff unordered, align 4 + %tot1 = add i32 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldr {{w[0-9]+}}, [x0, x1, sxtw #2] + + %ptr_unscaled = getelementptr i32* %p, i32 -64 + %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4 + %tot2 = add i32 %tot1, %val_unscaled +; CHECK: ldur {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + %val_random = load atomic i32* %ptr_random unordered, align 4 + %tot3 = add i32 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]] + + ret i32 %tot3 +} + +define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_64: + %ptr_unsigned = getelementptr i64* %p, i32 4095 + %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8 +; CHECK: ldr {{x[0-9]+}}, [x0, #32760] + + %ptr_regoff = getelementptr i64* %p, i32 %off32 + %val_regoff = load atomic i64* %ptr_regoff unordered, align 8 + %tot1 = add i64 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldr {{x[0-9]+}}, [x0, x1, sxtw #3] + + %ptr_unscaled = getelementptr i64* %p, i32 -32 + %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8 + %tot2 = add i64 %tot1, %val_unscaled +; CHECK: ldur {{x[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + %val_random = load atomic i64* %ptr_random unordered, align 8 + %tot3 = add i64 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]] + + ret i64 %tot3 +} + + +define void @atomc_store(i32* %p) { + store atomic i32 4, i32* %p seq_cst, align 4 + ret void + ; CHECK-LABEL: atomc_store: + ; CHECK: stlr +} + +define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) { +; CHECK-LABEL: atomic_store_relaxed_8: + %ptr_unsigned = getelementptr i8* %p, i32 4095 + store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1 +; CHECK: strb {{w[0-9]+}}, [x0, #4095] + + %ptr_regoff = getelementptr i8* %p, i32 %off32 + store atomic i8 %val, i8* %ptr_regoff unordered, align 1 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: strb {{w[0-9]+}}, [x0, x1, sxtw] + + %ptr_unscaled = getelementptr i8* %p, i32 -256 + store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1 +; CHECK: sturb {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + store atomic i8 %val, i8* %ptr_random unordered, align 1 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]] + + ret void +} + +define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) { +; CHECK-LABEL: atomic_store_relaxed_16: + %ptr_unsigned = getelementptr i16* %p, i32 4095 + store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2 +; CHECK: strh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr i16* %p, i32 %off32 + store atomic i16 %val, i16* %ptr_regoff unordered, align 2 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: strh {{w[0-9]+}}, [x0, x1, sxtw #1] + + %ptr_unscaled = getelementptr i16* %p, i32 -128 + store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2 +; CHECK: sturh {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + store atomic i16 %val, i16* %ptr_random unordered, align 2 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]] + + ret void +} + +define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) { +; CHECK-LABEL: atomic_store_relaxed_32: + %ptr_unsigned = getelementptr i32* %p, i32 4095 + store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4 +; CHECK: str {{w[0-9]+}}, [x0, #16380] + + %ptr_regoff = getelementptr i32* %p, i32 %off32 + store atomic i32 %val, i32* %ptr_regoff unordered, align 4 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: str {{w[0-9]+}}, [x0, x1, sxtw #2] + + %ptr_unscaled = getelementptr i32* %p, i32 -64 + store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4 +; CHECK: stur {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + store atomic i32 %val, i32* %ptr_random unordered, align 4 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: str {{w[0-9]+}}, [x[[ADDR]]] + + ret void +} + +define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) { +; CHECK-LABEL: atomic_store_relaxed_64: + %ptr_unsigned = getelementptr i64* %p, i32 4095 + store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8 +; CHECK: str {{x[0-9]+}}, [x0, #32760] + + %ptr_regoff = getelementptr i64* %p, i32 %off32 + store atomic i64 %val, i64* %ptr_regoff unordered, align 8 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: str {{x[0-9]+}}, [x0, x1, sxtw #3] + + %ptr_unscaled = getelementptr i64* %p, i32 -32 + store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8 +; CHECK: stur {{x[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + store atomic i64 %val, i64* %ptr_random unordered, align 8 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: str {{x[0-9]+}}, [x[[ADDR]]] + + ret void +} + +; rdar://11531169 +; rdar://11531308 + +%"class.X::Atomic" = type { %struct.x_atomic_t } +%struct.x_atomic_t = type { i32 } + +@counter = external hidden global %"class.X::Atomic", align 4 + +define i32 @next_id() nounwind optsize ssp align 2 { +entry: + %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst + %add.i = add i32 %0, 1 + %tobool = icmp eq i32 %add.i, 0 + br i1 %tobool, label %if.else, label %return + +if.else: ; preds = %entry + %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst + %add.i2 = add i32 %1, 1 + br label %return + +return: ; preds = %if.else, %entry + %retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ] + ret i32 %retval.0 +} diff --git a/test/CodeGen/ARM64/basic-pic.ll b/test/CodeGen/ARM64/basic-pic.ll new file mode 100644 index 0000000..9fdb1e9 --- /dev/null +++ b/test/CodeGen/ARM64/basic-pic.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s + +@var = global i32 0 + +define i32 @get_globalvar() { +; CHECK-LABEL: get_globalvar: + + %val = load i32* @var +; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var +; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], :got_lo12:var] +; CHECK: ldr w0, [x[[GOTLOC]]] + + ret i32 %val +} + +define i32* @get_globalvaraddr() { +; CHECK-LABEL: get_globalvaraddr: + + %val = load i32* @var +; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var +; CHECK: ldr x0, [x[[GOTHI]], :got_lo12:var] + + ret i32* @var +} + +@hiddenvar = hidden global i32 0 + +define i32 @get_hiddenvar() { +; CHECK-LABEL: get_hiddenvar: + + %val = load i32* @hiddenvar +; CHECK: adrp x[[HI:[0-9]+]], hiddenvar +; CHECK: ldr w0, [x[[HI]], :lo12:hiddenvar] + + ret i32 %val +} + +define i32* @get_hiddenvaraddr() { +; CHECK-LABEL: get_hiddenvaraddr: + + %val = load i32* @hiddenvar +; CHECK: adrp [[HI:x[0-9]+]], hiddenvar +; CHECK: add x0, [[HI]], :lo12:hiddenvar + + ret i32* @hiddenvar +} + +define void()* @get_func() { +; CHECK-LABEL: get_func: + + ret void()* bitcast(void()*()* @get_func to void()*) +; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func +; CHECK: ldr x0, [x[[GOTHI]], :got_lo12:get_func] +} diff --git a/test/CodeGen/ARM64/big-imm-offsets.ll b/test/CodeGen/ARM64/big-imm-offsets.ll new file mode 100644 index 0000000..a56df07 --- /dev/null +++ b/test/CodeGen/ARM64/big-imm-offsets.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=arm64 < %s + + +; Make sure large offsets aren't mistaken for valid immediate offsets. +; +define void @f(i32* nocapture %p) { +entry: + %a = ptrtoint i32* %p to i64 + %ao = add i64 %a, 25769803792 + %b = inttoptr i64 %ao to i32* + store volatile i32 0, i32* %b, align 4 + store volatile i32 0, i32* %b, align 4 + ret void +} diff --git a/test/CodeGen/ARM64/big-stack.ll b/test/CodeGen/ARM64/big-stack.ll new file mode 100644 index 0000000..56ca30c --- /dev/null +++ b/test/CodeGen/ARM64/big-stack.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s | FileCheck %s +target triple = "arm64-apple-macosx10" + +; Check that big stacks are generated correctly. +; Currently, this is done by a sequence of sub instructions, +; which can encode immediate with a 12 bits mask an optionally +; shift left (up to 12). I.e., 16773120 is the biggest value. +; +; CHECK-LABEL: foo: +; CHECK: sub sp, sp, #16773120 +; CHECK: sub sp, sp, #16773120 +; CHECK: sub sp, sp, #8192 +define void @foo() nounwind ssp { +entry: + %buffer = alloca [33554432 x i8], align 1 + %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0, i64 0 + call void @doit(i8* %arraydecay) nounwind + ret void +} + +declare void @doit(i8*) diff --git a/test/CodeGen/ARM64/bitfield-extract.ll b/test/CodeGen/ARM64/bitfield-extract.ll new file mode 100644 index 0000000..96b6967 --- /dev/null +++ b/test/CodeGen/ARM64/bitfield-extract.ll @@ -0,0 +1,406 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s +%struct.X = type { i8, i8, [2 x i8] } +%struct.Y = type { i32, i8 } +%struct.Z = type { i8, i8, [2 x i8], i16 } +%struct.A = type { i64, i8 } + +define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp { +; CHECK-LABEL: foo: +; CHECK: ubfm +; CHECK-NOT: and +; CHECK: ret + + %tmp = bitcast %struct.X* %x to i32* + %tmp1 = load i32* %tmp, align 4 + %b = getelementptr inbounds %struct.Y* %y, i64 0, i32 1 + %bf.clear = lshr i32 %tmp1, 3 + %bf.clear.lobit = and i32 %bf.clear, 1 + %frombool = trunc i32 %bf.clear.lobit to i8 + store i8 %frombool, i8* %b, align 1 + ret void +} + +define i32 @baz(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: baz: +; CHECK: sbfm w0, w0, #0, #3 + %tmp = trunc i64 %cav1.coerce to i32 + %tmp1 = shl i32 %tmp, 28 + %bf.val.sext = ashr exact i32 %tmp1, 28 + ret i32 %bf.val.sext +} + +define i32 @bar(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: bar: +; CHECK: sbfm w0, w0, #4, #9 + %tmp = trunc i64 %cav1.coerce to i32 + %cav1.sroa.0.1.insert = shl i32 %tmp, 22 + %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26 + ret i32 %tmp1 +} + +define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp { +; CHECK-LABEL: fct1: +; CHECK: ubfm +; CHECK-NOT: and +; CHECK: ret + + %tmp = bitcast %struct.Z* %x to i64* + %tmp1 = load i64* %tmp, align 4 + %b = getelementptr inbounds %struct.A* %y, i64 0, i32 0 + %bf.clear = lshr i64 %tmp1, 3 + %bf.clear.lobit = and i64 %bf.clear, 1 + store i64 %bf.clear.lobit, i64* %b, align 8 + ret void +} + +define i64 @fct2(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: fct2: +; CHECK: sbfm x0, x0, #0, #35 + %tmp = shl i64 %cav1.coerce, 28 + %bf.val.sext = ashr exact i64 %tmp, 28 + ret i64 %bf.val.sext +} + +define i64 @fct3(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: fct3: +; CHECK: sbfm x0, x0, #4, #41 + %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22 + %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26 + ret i64 %tmp1 +} + +define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct4: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #39 +; CHECK-NEXT: str [[REG1]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -16777216 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 16777215 + %or = or i64 %and, %and1 + store i64 %or, i64* %y, align 8 + ret void +} + +define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct5: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; CHECK-NEXT: str [[REG1]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + store i32 %or, i32* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some low bits +define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct6: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shr1 = lshr i32 %or, 2 + store i32 %shr1, i32* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct7: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + store i32 %shl, i32* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some low bits +; (i64 version) +define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct8: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shr1 = lshr i64 %or, 2 + store i64 %shr1, i64* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +; (i64 version) +define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct9: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + store i64 %shl, i64* %y, align 8 + ret void +} + +; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) +; (i32 version) +define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct10: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #0, #2 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %and1 = and i32 %x, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + store i32 %shl, i32* %y, align 8 + ret void +} + +; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) +; (i64 version) +define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct11: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #0, #2 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %and1 = and i64 %x, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + store i64 %shl, i64* %y, align 8 + ret void +} + +define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 { +; CHECK-LABEL: fct12bis: +; CHECK-NOT: and +; CHECK: ubfm w0, w0, #11, #11 + %and.i.i = and i32 %tmp2, 2048 + %tobool.i.i = icmp ne i32 %and.i.i, 0 + ret i1 %tobool.i.i +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct12: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG2:w[0-9]+]], [[REG1]], #2, #29 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + %shr2 = lshr i32 %shl, 4 + store i32 %shr2, i32* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +; (i64 version) +define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct13: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG2:x[0-9]+]], [[REG1]], #2, #61 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + %shr2 = lshr i64 %shl, 4 + store i64 %shr2, i64* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct14: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #23 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4 +; CHECK-NEXT: bfm [[REG2]], w2, #5, #7 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -256 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 255 + %or = or i32 %and, %and1 + %shl = lshr i32 %or, 4 + %and2 = and i32 %shl, -8 + %shr1 = lshr i32 %x1, 5 + %and3 = and i32 %shr1, 7 + %or1 = or i32 %and2, %and3 + %shl1 = shl i32 %or1, 2 + store i32 %shl1, i32* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +; (i64 version) +define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct15: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #23 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4 +; CHECK-NEXT: bfm [[REG2]], x2, #5, #7 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -256 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 255 + %or = or i64 %and, %and1 + %shl = lshr i64 %or, 4 + %and2 = and i64 %shl, -8 + %shr1 = lshr i64 %x1, 5 + %and3 = and i64 %shr1, 7 + %or1 = or i64 %and2, %and3 + %shl1 = shl i64 %or1, 2 + store i64 %shl1, i64* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits and a masking operation has to be kept +define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct16: +; CHECK: ldr [[REG1:w[0-9]+]], +; Create the constant +; CHECK: movz [[REGCST:w[0-9]+]], #26, lsl #16 +; CHECK: movk [[REGCST]], #33120 +; Do the masking +; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]] +; CHECK-NEXT: bfm [[REG2]], w1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG3:w[0-9]+]], [[REG2]], #2, #29 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, 1737056 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + %shr2 = lshr i32 %shl, 4 + store i32 %shr2, i32* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits and a masking operation has to be kept +; (i64 version) +define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct17: +; CHECK: ldr [[REG1:x[0-9]+]], +; Create the constant +; CHECK: movz [[REGCST:x[0-9]+]], #26, lsl #16 +; CHECK: movk [[REGCST]], #33120 +; Do the masking +; CHECK: and [[REG2:x[0-9]+]], [[REG1]], [[REGCST]] +; CHECK-NEXT: bfm [[REG2]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG3:x[0-9]+]], [[REG2]], #2, #61 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, 1737056 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + %shr2 = lshr i64 %shl, 4 + store i64 %shr2, i64* %y, align 8 + ret void +} + +define i64 @fct18(i32 %xor72) nounwind ssp { +; CHECK-LABEL: fct18: +; CHECK: ubfm x0, x0, #9, #16 + %shr81 = lshr i32 %xor72, 9 + %conv82 = zext i32 %shr81 to i64 + %result = and i64 %conv82, 255 + ret i64 %result +} diff --git a/test/CodeGen/ARM64/blockaddress.ll b/test/CodeGen/ARM64/blockaddress.ll new file mode 100644 index 0000000..ac4f19e --- /dev/null +++ b/test/CodeGen/ARM64/blockaddress.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc < %s -mtriple=arm64-linux-gnu -code-model=large| FileCheck %s --check-prefix=CHECK-LARGE + +; rdar://9188695 + +define i64 @t() nounwind ssp { +entry: +; CHECK-LABEL: t: +; CHECK: adrp [[REG:x[0-9]+]], Ltmp1@PAGE +; CHECK: add {{x[0-9]+}}, [[REG]], Ltmp1@PAGEOFF + +; CHECK-LINUX-LABEL: t: +; CHECK-LINUX: adrp [[REG:x[0-9]+]], .Ltmp1 +; CHECK-LINUX: add {{x[0-9]+}}, [[REG]], :lo12:.Ltmp1 + +; CHECK-LARGE-LABEL: t: +; CHECK-LARGE: movz [[ADDR_REG:x[0-9]+]], #:abs_g3:[[DEST_LBL:.Ltmp[0-9]+]] +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g2_nc:[[DEST_LBL]] +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g1_nc:[[DEST_LBL]] +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g0_nc:[[DEST_LBL]] + + %recover = alloca i64, align 8 + store volatile i64 ptrtoint (i8* blockaddress(@t, %mylabel) to i64), i64* %recover, align 8 + br label %mylabel + +mylabel: + %tmp = load volatile i64* %recover, align 8 + ret i64 %tmp +} diff --git a/test/CodeGen/ARM64/build-vector.ll b/test/CodeGen/ARM64/build-vector.ll new file mode 100644 index 0000000..1d137ae --- /dev/null +++ b/test/CodeGen/ARM64/build-vector.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +; Check that building up a vector w/ only one non-zero lane initializes +; intelligently. +define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind { +; CHECK-LABEL: one_lane: +; CHECK: dup.16b v[[REG:[0-9]+]], wzr +; CHECK-NEXT: ins.b v[[REG]][0], w1 +; v and q are aliases, and str is prefered against st.16b when possible +; rdar://11246289 +; CHECK: str q[[REG]], [x0] +; CHECK: ret + %conv = trunc i32 %skip0 to i8 + %vset_lane = insertelement <16 x i8> , i8 %conv, i32 0 + %tmp = bitcast i32* %out_int to <4 x i32>* + %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32> + store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16 + ret void +} + +; Check that building a vector from floats doesn't insert an unnecessary +; copy for lane zero. +define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { +; CHECK-LABEL: foo: +; CHECK-NOT: ins.s v0[0], v0[0] +; CHECK: ins.s v0[1], v1[0] +; CHECK: ins.s v0[2], v2[0] +; CHECK: ins.s v0[3], v3[0] +; CHECK: ret + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float %b, i32 1 + %3 = insertelement <4 x float> %2, float %c, i32 2 + %4 = insertelement <4 x float> %3, float %d, i32 3 + ret <4 x float> %4 +} diff --git a/test/CodeGen/ARM64/call-tailcalls.ll b/test/CodeGen/ARM64/call-tailcalls.ll new file mode 100644 index 0000000..487c1d9 --- /dev/null +++ b/test/CodeGen/ARM64/call-tailcalls.ll @@ -0,0 +1,91 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s + +@t = weak global i32 ()* null +@x = external global i32, align 4 + +define void @t2() { +; CHECK-LABEL: t2: +; CHECK: adrp x[[GOTADDR:[0-9]+]], _t@GOTPAGE +; CHECK: ldr x[[ADDR:[0-9]+]], [x[[GOTADDR]], _t@GOTPAGEOFF] +; CHECK: ldr x[[DEST:[0-9]+]], [x[[ADDR]]] +; CHECK: br x[[DEST]] + %tmp = load i32 ()** @t + %tmp.upgrd.2 = tail call i32 %tmp() + ret void +} + +define void @t3() { +; CHECK-LABEL: t3: +; CHECK: b _t2 + tail call void @t2() + ret void +} + +define double @t4(double %a) nounwind readonly ssp { +; CHECK-LABEL: t4: +; CHECK: b _sin + %tmp = tail call double @sin(double %a) nounwind readonly + ret double %tmp +} + +define float @t5(float %a) nounwind readonly ssp { +; CHECK-LABEL: t5: +; CHECK: b _sinf + %tmp = tail call float @sinf(float %a) nounwind readonly + ret float %tmp +} + +define void @t7() nounwind { +; CHECK-LABEL: t7: +; CHECK: b _foo +; CHECK: b _bar + + br i1 undef, label %bb, label %bb1.lr.ph + +bb1.lr.ph: ; preds = %entry + tail call void @bar() nounwind + ret void + +bb: ; preds = %entry + tail call void @foo() nounwind + ret void +} + +define i32 @t8(i32 %x) nounwind ssp { +; CHECK-LABEL: t8: +; CHECK: b _a +; CHECK: b _b +; CHECK: b _c + %and = and i32 %x, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i32 @a(i32 %x) nounwind + br label %return + +if.end: ; preds = %entry + %and1 = and i32 %x, 2 + %tobool2 = icmp eq i32 %and1, 0 + br i1 %tobool2, label %if.end5, label %if.then3 + +if.then3: ; preds = %if.end + %call4 = tail call i32 @b(i32 %x) nounwind + br label %return + +if.end5: ; preds = %if.end + %call6 = tail call i32 @c(i32 %x) nounwind + br label %return + +return: ; preds = %if.end5, %if.then3, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ] + ret i32 %retval.0 +} + +declare float @sinf(float) nounwind readonly +declare double @sin(double) nounwind readonly +declare void @bar() nounwind +declare void @foo() nounwind +declare i32 @a(i32) +declare i32 @b(i32) +declare i32 @c(i32) diff --git a/test/CodeGen/ARM64/cast-opt.ll b/test/CodeGen/ARM64/cast-opt.ll new file mode 100644 index 0000000..3d7f257 --- /dev/null +++ b/test/CodeGen/ARM64/cast-opt.ll @@ -0,0 +1,31 @@ +; RUN: llc -O3 -march=arm64 -mtriple arm64-apple-ios5.0.0 < %s | FileCheck %s +; +; Zero truncation is not necessary when the values are extended properly +; already. + +@block = common global i8* null, align 8 + +define zeroext i8 @foo(i32 %i1, i32 %i2) { +; CHECK-LABEL: foo: +; CHECK: csinc +; CHECK-NOT: and +entry: + %idxprom = sext i32 %i1 to i64 + %0 = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom + %1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1 + %2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %1, %2 + br i1 %cmp, label %return, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %1, %2 + %conv9 = zext i1 %cmp7 to i8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i8 [ %conv9, %if.then ], [ 1, %entry ] + ret i8 %retval.0 +} diff --git a/test/CodeGen/ARM64/ccmp-heuristics.ll b/test/CodeGen/ARM64/ccmp-heuristics.ll new file mode 100644 index 0000000..5575997 --- /dev/null +++ b/test/CodeGen/ARM64/ccmp-heuristics.ll @@ -0,0 +1,190 @@ +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp | FileCheck %s +target triple = "arm64-apple-ios7.0.0" + +@channelColumns = external global i64 +@channelTracks = external global i64 +@mazeRoute = external hidden unnamed_addr global i8*, align 8 +@TOP = external global i64* +@BOT = external global i64* +@netsAssign = external global i64* + +; Function from yacr2/maze.c +; The branch at the end of %if.then is driven by %cmp5 and %cmp6. +; Isel converts the and i1 into two branches, and arm64-ccmp should not convert +; it back again. %cmp6 has much higher latency than %cmp5. +; CHECK: Maze1 +; CHECK: %if.then +; CHECK: cmp x{{[0-9]+}}, #2 +; CHECK-NEXT b.cc +; CHECK: %if.then +; CHECK: cmp x{{[0-9]+}}, #2 +; CHECK-NEXT b.cc +define i32 @Maze1() nounwind ssp { +entry: + %0 = load i64* @channelColumns, align 8, !tbaa !0 + %cmp90 = icmp eq i64 %0, 0 + br i1 %cmp90, label %for.end, label %for.body + +for.body: ; preds = %for.inc, %entry + %1 = phi i64 [ %0, %entry ], [ %37, %for.inc ] + %i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ] + %numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ] + %2 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx = getelementptr inbounds i8* %2, i64 %i.092 + %3 = load i8* %arrayidx, align 1, !tbaa !1 + %tobool = icmp eq i8 %3, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %4 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx1 = getelementptr inbounds i64* %4, i64 %i.092 + %5 = load i64* %arrayidx1, align 8, !tbaa !0 + %6 = load i64** @netsAssign, align 8, !tbaa !3 + %arrayidx2 = getelementptr inbounds i64* %6, i64 %5 + %7 = load i64* %arrayidx2, align 8, !tbaa !0 + %8 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx3 = getelementptr inbounds i64* %8, i64 %i.092 + %9 = load i64* %arrayidx3, align 8, !tbaa !0 + %arrayidx4 = getelementptr inbounds i64* %6, i64 %9 + %10 = load i64* %arrayidx4, align 8, !tbaa !0 + %cmp5 = icmp ugt i64 %i.092, 1 + %cmp6 = icmp ugt i64 %10, 1 + %or.cond = and i1 %cmp5, %cmp6 + br i1 %or.cond, label %land.lhs.true7, label %if.else + +land.lhs.true7: ; preds = %if.then + %11 = load i64* @channelTracks, align 8, !tbaa !0 + %add = add i64 %11, 1 + %call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64 %10, i64 0, i64 %7, i32 -1, i32 -1) + %tobool8 = icmp eq i32 %call, 0 + br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label %if.then9 + +land.lhs.true7.if.else_crit_edge: ; preds = %land.lhs.true7 + %.pre = load i64* @channelColumns, align 8, !tbaa !0 + br label %if.else + +if.then9: ; preds = %land.lhs.true7 + %12 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx10 = getelementptr inbounds i8* %12, i64 %i.092 + store i8 0, i8* %arrayidx10, align 1, !tbaa !1 + %13 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx11 = getelementptr inbounds i64* %13, i64 %i.092 + %14 = load i64* %arrayidx11, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %14) + %15 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx12 = getelementptr inbounds i64* %15, i64 %i.092 + %16 = load i64* %arrayidx12, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %16) + br label %for.inc + +if.else: ; preds = %land.lhs.true7.if.else_crit_edge, %if.then + %17 = phi i64 [ %.pre, %land.lhs.true7.if.else_crit_edge ], [ %1, %if.then ] + %cmp13 = icmp ult i64 %i.092, %17 + %or.cond89 = and i1 %cmp13, %cmp6 + br i1 %or.cond89, label %land.lhs.true16, label %if.else24 + +land.lhs.true16: ; preds = %if.else + %18 = load i64* @channelTracks, align 8, !tbaa !0 + %add17 = add i64 %18, 1 + %call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64 %10, i64 0, i64 %7, i32 1, i32 -1) + %tobool19 = icmp eq i32 %call18, 0 + br i1 %tobool19, label %if.else24, label %if.then20 + +if.then20: ; preds = %land.lhs.true16 + %19 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx21 = getelementptr inbounds i8* %19, i64 %i.092 + store i8 0, i8* %arrayidx21, align 1, !tbaa !1 + %20 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx22 = getelementptr inbounds i64* %20, i64 %i.092 + %21 = load i64* %arrayidx22, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %21) + %22 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx23 = getelementptr inbounds i64* %22, i64 %i.092 + %23 = load i64* %arrayidx23, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %23) + br label %for.inc + +if.else24: ; preds = %land.lhs.true16, %if.else + br i1 %cmp5, label %land.lhs.true26, label %if.else36 + +land.lhs.true26: ; preds = %if.else24 + %24 = load i64* @channelTracks, align 8, !tbaa !0 + %cmp27 = icmp ult i64 %7, %24 + br i1 %cmp27, label %land.lhs.true28, label %if.else36 + +land.lhs.true28: ; preds = %land.lhs.true26 + %add29 = add i64 %24, 1 + %call30 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add29, i64 %10, i32 -1, i32 1) + %tobool31 = icmp eq i32 %call30, 0 + br i1 %tobool31, label %if.else36, label %if.then32 + +if.then32: ; preds = %land.lhs.true28 + %25 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx33 = getelementptr inbounds i8* %25, i64 %i.092 + store i8 0, i8* %arrayidx33, align 1, !tbaa !1 + %26 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx34 = getelementptr inbounds i64* %26, i64 %i.092 + %27 = load i64* %arrayidx34, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %27) + %28 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx35 = getelementptr inbounds i64* %28, i64 %i.092 + %29 = load i64* %arrayidx35, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %29) + br label %for.inc + +if.else36: ; preds = %land.lhs.true28, %land.lhs.true26, %if.else24 + %30 = load i64* @channelColumns, align 8, !tbaa !0 + %cmp37 = icmp ult i64 %i.092, %30 + br i1 %cmp37, label %land.lhs.true38, label %if.else48 + +land.lhs.true38: ; preds = %if.else36 + %31 = load i64* @channelTracks, align 8, !tbaa !0 + %cmp39 = icmp ult i64 %7, %31 + br i1 %cmp39, label %land.lhs.true40, label %if.else48 + +land.lhs.true40: ; preds = %land.lhs.true38 + %add41 = add i64 %31, 1 + %call42 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add41, i64 %10, i32 1, i32 1) + %tobool43 = icmp eq i32 %call42, 0 + br i1 %tobool43, label %if.else48, label %if.then44 + +if.then44: ; preds = %land.lhs.true40 + %32 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx45 = getelementptr inbounds i8* %32, i64 %i.092 + store i8 0, i8* %arrayidx45, align 1, !tbaa !1 + %33 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx46 = getelementptr inbounds i64* %33, i64 %i.092 + %34 = load i64* %arrayidx46, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %34) + %35 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx47 = getelementptr inbounds i64* %35, i64 %i.092 + %36 = load i64* %arrayidx47, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %36) + br label %for.inc + +if.else48: ; preds = %land.lhs.true40, %land.lhs.true38, %if.else36 + %inc = add nsw i32 %numLeft.091, 1 + br label %for.inc + +for.inc: ; preds = %if.else48, %if.then44, %if.then32, %if.then20, %if.then9, %for.body + %numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091, %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [ %inc, %if.else48 ], [ %numLeft.091, %for.body ] + %inc53 = add i64 %i.092, 1 + %37 = load i64* @channelColumns, align 8, !tbaa !0 + %cmp = icmp ugt i64 %inc53, %37 + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + %numLeft.0.lcssa = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ] + ret i32 %numLeft.0.lcssa +} + +; Materializable +declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32) nounwind ssp + +; Materializable +declare hidden fastcc void @CleanNet(i64) nounwind ssp + +!0 = metadata !{metadata !"long", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} diff --git a/test/CodeGen/ARM64/ccmp.ll b/test/CodeGen/ARM64/ccmp.ll new file mode 100644 index 0000000..79e6f94 --- /dev/null +++ b/test/CodeGen/ARM64/ccmp.ll @@ -0,0 +1,289 @@ +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp -arm64-stress-ccmp | FileCheck %s +target triple = "arm64-apple-ios" + +; CHECK: single_same +; CHECK: cmp w0, #5 +; CHECK-NEXT: ccmp w1, #17, #4, ne +; CHECK-NEXT: b.ne +; CHECK: %if.then +; CHECK: bl _foo +; CHECK: %if.end +define i32 @single_same(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + %cmp1 = icmp eq i32 %b, 17 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Different condition codes for the two compares. +; CHECK: single_different +; CHECK: cmp w0, #6 +; CHECK-NEXT: ccmp w1, #17, #0, ge +; CHECK-NEXT: b.eq +; CHECK: %if.then +; CHECK: bl _foo +; CHECK: %if.end +define i32 @single_different(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp sle i32 %a, 5 + %cmp1 = icmp ne i32 %b, 17 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Second block clobbers the flags, can't convert (easily). +; CHECK: single_flagclobber +; CHECK: cmp +; CHECK: b.eq +; CHECK: cmp +; CHECK: b.gt +define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + br i1 %cmp, label %if.then, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %cmp1 = icmp slt i32 %b, 7 + %mul = shl nsw i32 %b, 1 + %add = add nsw i32 %b, 1 + %cond = select i1 %cmp1, i32 %mul, i32 %add + %cmp2 = icmp slt i32 %cond, 17 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false, %entry + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: ; preds = %if.then, %lor.lhs.false + ret i32 7 +} + +; Second block clobbers the flags and ends with a tbz terminator. +; CHECK: single_flagclobber_tbz +; CHECK: cmp +; CHECK: b.eq +; CHECK: cmp +; CHECK: tbz +define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + br i1 %cmp, label %if.then, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %cmp1 = icmp slt i32 %b, 7 + %mul = shl nsw i32 %b, 1 + %add = add nsw i32 %b, 1 + %cond = select i1 %cmp1, i32 %mul, i32 %add + %and = and i32 %cond, 8 + %cmp2 = icmp ne i32 %and, 0 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false, %entry + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: ; preds = %if.then, %lor.lhs.false + ret i32 7 +} + +; Speculatively execute division by zero. +; The sdiv/udiv instructions do not trap when the divisor is zero, so they are +; safe to speculate. +; CHECK: speculate_division +; CHECK-NOT: cmp +; CHECK: sdiv +; CHECK: cmp +; CHECK-NEXT: ccmp +define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: + %div = sdiv i32 %b, %a + %cmp1 = icmp slt i32 %div, 17 + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Floating point compare. +; CHECK: single_fcmp +; CHECK: cmp +; CHECK-NOT: b. +; CHECK: fccmp {{.*}}, #8, ge +; CHECK: b.lt +define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: + %conv = sitofp i32 %a to float + %div = fdiv float %b, %conv + %cmp1 = fcmp oge float %div, 1.700000e+01 + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Chain multiple compares. +; CHECK: multi_different +; CHECK: cmp +; CHECK: ccmp +; CHECK: ccmp +; CHECK: b. +define void @multi_different(i32 %a, i32 %b, i32 %c) nounwind ssp { +entry: + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: + %div = sdiv i32 %b, %a + %cmp1 = icmp eq i32 %div, 5 + %cmp4 = icmp sgt i32 %div, %c + %or.cond = and i1 %cmp1, %cmp4 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret void +} + +; Convert a cbz in the head block. +; CHECK: cbz_head +; CHECK: cmp w0, #0 +; CHECK: ccmp +define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp ne i32 %b, 17 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Check that the immediate operand is in range. The ccmp instruction encodes a +; smaller range of immediates than subs/adds. +; The ccmp immediates must be in the range 0-31. +; CHECK: immediate_range +; CHECK-NOT: ccmp +define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + %cmp1 = icmp eq i32 %b, 32 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Convert a cbz in the second block. +; CHECK: cbz_second +; CHECK: cmp w0, #0 +; CHECK: ccmp w1, #0, #0, ne +; CHECK: b.eq +define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp ne i32 %b, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Convert a cbnz in the second block. +; CHECK: cbnz_second +; CHECK: cmp w0, #0 +; CHECK: ccmp w1, #0, #4, ne +; CHECK: b.ne +define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp eq i32 %b, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} +declare i32 @foo() + +%str1 = type { %str2 } +%str2 = type { [24 x i8], i8*, i32, %str1*, i32, [4 x i8], %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, i8*, i8, i8*, %str1*, i8* } + +; Test case distilled from 126.gcc. +; The phi in sw.bb.i.i gets multiple operands for the %entry predecessor. +; CHECK: build_modify_expr +define void @build_modify_expr() nounwind ssp { +entry: + switch i32 undef, label %sw.bb.i.i [ + i32 69, label %if.end85 + i32 70, label %if.end85 + i32 71, label %if.end85 + i32 72, label %if.end85 + i32 73, label %if.end85 + i32 105, label %if.end85 + i32 106, label %if.end85 + ] + +if.end85: + ret void + +sw.bb.i.i: + %ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ] + %operands.i.i = getelementptr inbounds %str1* %ref.tr.i.i, i64 0, i32 0, i32 2 + %arrayidx.i.i = bitcast i32* %operands.i.i to %str1** + %0 = load %str1** %arrayidx.i.i, align 8 + %code1.i.i.phi.trans.insert = getelementptr inbounds %str1* %0, i64 0, i32 0, i32 0, i64 16 + br label %sw.bb.i.i +} diff --git a/test/CodeGen/ARM64/coalesce-ext.ll b/test/CodeGen/ARM64/coalesce-ext.ll new file mode 100644 index 0000000..9e8d08e --- /dev/null +++ b/test/CodeGen/ARM64/coalesce-ext.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=arm64 -mtriple=arm64-apple-darwin < %s | FileCheck %s +; Check that the peephole optimizer knows about sext and zext instructions. +; CHECK: test1sext +define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { + %C = add i64 %A, %B + ; CHECK: add x[[SUM:[0-9]+]], x0, x1 + %D = trunc i64 %C to i32 + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + ; CHECK: sxtw x[[EXT:[0-9]+]], x[[SUM]] + store volatile i64 %F, i64 *%P2 + ; CHECK: str x[[EXT]] + store volatile i32 %D, i32* %P + ; Reuse low bits of extended register, don't extend live range of SUM. + ; CHECK: str w[[SUM]] + ret i32 %D +} diff --git a/test/CodeGen/ARM64/code-model-large-abs.ll b/test/CodeGen/ARM64/code-model-large-abs.ll new file mode 100644 index 0000000..264da2d --- /dev/null +++ b/test/CodeGen/ARM64/code-model-large-abs.ll @@ -0,0 +1,72 @@ +; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large < %s | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define i8* @global_addr() { +; CHECK-LABEL: global_addr: + ret i8* @var8 + ; The movz/movk calculation should end up returned directly in x0. +; CHECK: movz x0, #:abs_g3:var8 +; CHECK: movk x0, #:abs_g2_nc:var8 +; CHECK: movk x0, #:abs_g1_nc:var8 +; CHECK: movk x0, #:abs_g0_nc:var8 +; CHECK-NEXT: ret +} + +define i8 @global_i8() { +; CHECK-LABEL: global_i8: + %val = load i8* @var8 + ret i8 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var8 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var8 +; CHECK: ldrb w0, [x[[ADDR_REG]]] +} + +define i16 @global_i16() { +; CHECK-LABEL: global_i16: + %val = load i16* @var16 + ret i16 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var16 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var16 +; CHECK: ldrh w0, [x[[ADDR_REG]]] +} + +define i32 @global_i32() { +; CHECK-LABEL: global_i32: + %val = load i32* @var32 + ret i32 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var32 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var32 +; CHECK: ldr w0, [x[[ADDR_REG]]] +} + +define i64 @global_i64() { +; CHECK-LABEL: global_i64: + %val = load i64* @var64 + ret i64 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var64 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var64 +; CHECK: ldr x0, [x[[ADDR_REG]]] +} + +define <2 x i64> @constpool() { +; CHECK-LABEL: constpool: + ret <2 x i64> + +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:[[CPADDR:.LCPI[0-9]+_[0-9]+]] +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:[[CPADDR]] +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:[[CPADDR]] +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:[[CPADDR]] +; CHECK: ldr q0, [x[[ADDR_REG]]] +} diff --git a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll b/test/CodeGen/ARM64/collect-loh-garbage-crash.ll new file mode 100644 index 0000000..98cb625 --- /dev/null +++ b/test/CodeGen/ARM64/collect-loh-garbage-crash.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=arm64-apple-ios -O3 -arm64-collect-loh -arm64-collect-loh-bb-only=true -arm64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s +; Check that the LOH analysis does not crash when the analysed chained +; contains instructions that are filtered out. +; +; Before the fix for , these cases were removed +; from the main container. Now, the deterministic container does not allow +; to remove arbitrary values, so we have to live with garbage values. +; + +%"class.H4ISP::H4ISPDevice" = type { i32 (%"class.H4ISP::H4ISPDevice"*, i32, i8*, i8*)*, i8*, i32*, %"class.H4ISP::H4ISPCameraManager"* } + +%"class.H4ISP::H4ISPCameraManager" = type opaque + +declare i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"*) + +@pH4ISPDevice = hidden global %"class.H4ISP::H4ISPDevice"* null, align 8 + +; CHECK-LABEL: _foo: +; CHECK: ret +; CHECK-NOT: .loh AdrpLdrGotLdr +define void @foo() { +entry: + br label %if.then83 +if.then83: ; preds = %if.end81 + %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8 + %call84 = call i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"* %tmp) #19 + tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"() + %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8 + tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"() + %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3 + %tmp3 = load %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8 + %tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null + br i1 %tobool.i269, label %if.then83, label %end +end: + ret void +} + diff --git a/test/CodeGen/ARM64/collect-loh-str.ll b/test/CodeGen/ARM64/collect-loh-str.ll new file mode 100644 index 0000000..fc63f8b --- /dev/null +++ b/test/CodeGen/ARM64/collect-loh-str.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; Test case for . +; AdrpAddStr cannot be used when the store uses same +; register as address and value. Indeed, the related +; if applied, may completely remove the definition or +; at least provide a wrong one (with the offset folded +; into the definition). + +%struct.anon = type { i32*, i32** } + +@pptp_wan_head = internal global %struct.anon zeroinitializer, align 8 + +; CHECK-LABEL: _pptp_wan_init +; CHECK: ret +; CHECK-NOT: AdrpAddStr +define i32 @pptp_wan_init() { +entry: + store i32* null, i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), align 8 + store i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), i32*** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 1), align 8 + ret i32 0 +} + + diff --git a/test/CodeGen/ARM64/collect-loh.ll b/test/CodeGen/ARM64/collect-loh.ll new file mode 100644 index 0000000..08ab062 --- /dev/null +++ b/test/CodeGen/ARM64/collect-loh.ll @@ -0,0 +1,47 @@ +; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s + +@a = internal unnamed_addr global i32 0, align 4 +@b = external global i32 + +; Function Attrs: noinline nounwind ssp +define void @foo(i32 %t) { +entry: + %tmp = load i32* @a, align 4 + %add = add nsw i32 %tmp, %t + store i32 %add, i32* @a, align 4 + ret void +} + +; Function Attrs: nounwind ssp +; Testcase for , AdrpAdrp reuse is valid only when the first adrp +; dominates the second. +; The first adrp comes from the loading of 'a' and the second the loading of 'b'. +; 'a' is loaded in if.then, 'b' in if.end4, if.then does not dominates if.end4. +; CHECK-LABEL: _test +; CHECK: ret +; CHECK-NOT: .loh AdrpAdrp +define i32 @test(i32 %t) { +entry: + %cmp = icmp sgt i32 %t, 5 + br i1 %cmp, label %if.then, label %if.end4 + +if.then: ; preds = %entry + %tmp = load i32* @a, align 4 + %add = add nsw i32 %tmp, %t + %cmp1 = icmp sgt i32 %add, 12 + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: ; preds = %if.then + tail call void @foo(i32 %add) + %tmp1 = load i32* @a, align 4 + br label %if.end4 + +if.end4: ; preds = %if.then2, %if.then, %entry + %t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t, %entry ] + %tmp2 = load i32* @b, align 4 + %add5 = add nsw i32 %tmp2, %t.addr.0 + tail call void @foo(i32 %add5) + %tmp3 = load i32* @b, align 4 + %add6 = add nsw i32 %tmp3, %t.addr.0 + ret i32 %add6 +} diff --git a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S new file mode 100644 index 0000000..250732d --- /dev/null +++ b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S @@ -0,0 +1,17 @@ +; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o /dev/null %s + + .text + .globl _foo + .cfi_startproc +_foo: + stp x29, x30, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + + ldp x29, x30, [sp], #16 + .cfi_adjust_cfa_offset -16 + .cfi_restore x29 + .cfi_restore x30 + + ret + + .cfi_endproc diff --git a/test/CodeGen/ARM64/complex-ret.ll b/test/CodeGen/ARM64/complex-ret.ll new file mode 100644 index 0000000..93d50a5 --- /dev/null +++ b/test/CodeGen/ARM64/complex-ret.ll @@ -0,0 +1,7 @@ +; RUN: llc -march=arm64 -o - %s | FileCheck %s + +define { i192, i192, i21, i192 } @foo(i192) { +; CHECK-LABEL: foo: +; CHECK: stp xzr, xzr, [x8] + ret { i192, i192, i21, i192 } {i192 0, i192 1, i21 2, i192 3} +} diff --git a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll b/test/CodeGen/ARM64/convert-v2f64-v2i32.ll new file mode 100644 index 0000000..1a07c98 --- /dev/null +++ b/test/CodeGen/ARM64/convert-v2f64-v2i32.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +; CHECK: fptosi_1 +; CHECK: fcvtzs.2d +; CHECK: xtn.2s +; CHECK: ret +define void @fptosi_1() nounwind noinline ssp { +entry: + %0 = fptosi <2 x double> undef to <2 x i32> + store <2 x i32> %0, <2 x i32>* undef, align 8 + ret void +} + +; CHECK: fptoui_1 +; CHECK: fcvtzu.2d +; CHECK: xtn.2s +; CHECK: ret +define void @fptoui_1() nounwind noinline ssp { +entry: + %0 = fptoui <2 x double> undef to <2 x i32> + store <2 x i32> %0, <2 x i32>* undef, align 8 + ret void +} + diff --git a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll b/test/CodeGen/ARM64/convert-v2i32-v2f64.ll new file mode 100644 index 0000000..63129a4 --- /dev/null +++ b/test/CodeGen/ARM64/convert-v2i32-v2f64.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define <2 x double> @f1(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: f1: +; CHECK: sshll.2d v0, v0, #0 +; CHECK-NEXT: scvtf.2d v0, v0 +; CHECK-NEXT: ret + %conv = sitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} +define <2 x double> @f2(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: f2: +; CHECK: ushll.2d v0, v0, #0 +; CHECK-NEXT: ucvtf.2d v0, v0 +; CHECK-NEXT: ret + %conv = uitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} + +; CHECK: autogen_SD19655 +; CHECK: scvtf +; CHECK: ret +define void @autogen_SD19655() { + %T = load <2 x i64>* undef + %F = sitofp <2 x i64> undef to <2 x float> + store <2 x float> %F, <2 x float>* undef + ret void +} + diff --git a/test/CodeGen/ARM64/copy-tuple.ll b/test/CodeGen/ARM64/copy-tuple.ll new file mode 100644 index 0000000..6325c3f --- /dev/null +++ b/test/CodeGen/ARM64/copy-tuple.ll @@ -0,0 +1,146 @@ +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s + +; The main purpose of this test is to find out whether copyPhysReg can deal with +; the memmove-like situation arising in tuples, where an early copy can clobber +; the value needed by a later one if the tuples overlap. + +; We use dummy inline asm to force LLVM to generate a COPY between the registers +; we want by clobbering all the others. + +define void @test_D1D2_from_D0D1(i8* %addr) #0 { +; CHECK-LABEL: test_D1D2_from_D0D1: +; CHECK: orr.8b v2, v1 +; CHECK: orr.8b v1, v0 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D0D1_from_D1D2(i8* %addr) #0 { +; CHECK-LABEL: test_D0D1_from_D1D2: +; CHECK: orr.8b v0, v1 +; CHECK: orr.8b v1, v2 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D0D1_from_D31D0(i8* %addr) #0 { +; CHECK-LABEL: test_D0D1_from_D31D0: +; CHECK: orr.8b v1, v0 +; CHECK: orr.8b v0, v31 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D31D0_from_D0D1(i8* %addr) #0 { +; CHECK-LABEL: test_D31D0_from_D0D1: +; CHECK: orr.8b v31, v0 +; CHECK: orr.8b v0, v1 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 { +; CHECK-LABEL: test_D2D3D4_from_D0D1D2: +; CHECK: orr.8b v4, v2 +; CHECK: orr.8b v3, v1 +; CHECK: orr.8b v2, v0 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1 + %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2 + + tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) + + tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) + ret void +} + +define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 { +; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3: +; CHECK: orr.16b v0, v1 +; CHECK: orr.16b v1, v2 +; CHECK: orr.16b v2, v3 +entry: + %addr_v16i8 = bitcast i8* %addr to <16 x i8>* + %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) + %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 + %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 + %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 + tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) + + tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) + ret void +} + +define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 { +; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1: +; CHECK: orr.16b v4, v1 +; CHECK: orr.16b v3, v0 +; CHECK: orr.16b v2, v31 +; CHECK: orr.16b v1, v30 + %addr_v16i8 = bitcast i8* %addr to <16 x i8>* + %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) + %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 + %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 + %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 + %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3 + + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"() + tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) + + tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) + ret void +} + +declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>*) +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>*) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>*) + +declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) +declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) diff --git a/test/CodeGen/ARM64/crc32.ll b/test/CodeGen/ARM64/crc32.ll new file mode 100644 index 0000000..609eb44 --- /dev/null +++ b/test/CodeGen/ARM64/crc32.ll @@ -0,0 +1,71 @@ +; RUN: llc -march=arm64 -o - %s | FileCheck %s + +define i32 @test_crc32b(i32 %cur, i8 %next) { +; CHECK-LABEL: test_crc32b: +; CHECK: crc32b w0, w0, w1 + %bits = zext i8 %next to i32 + %val = call i32 @llvm.arm64.crc32b(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32h(i32 %cur, i16 %next) { +; CHECK-LABEL: test_crc32h: +; CHECK: crc32h w0, w0, w1 + %bits = zext i16 %next to i32 + %val = call i32 @llvm.arm64.crc32h(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32w(i32 %cur, i32 %next) { +; CHECK-LABEL: test_crc32w: +; CHECK: crc32w w0, w0, w1 + %val = call i32 @llvm.arm64.crc32w(i32 %cur, i32 %next) + ret i32 %val +} + +define i32 @test_crc32x(i32 %cur, i64 %next) { +; CHECK-LABEL: test_crc32x: +; CHECK: crc32x w0, w0, x1 + %val = call i32 @llvm.arm64.crc32x(i32 %cur, i64 %next) + ret i32 %val +} + +define i32 @test_crc32cb(i32 %cur, i8 %next) { +; CHECK-LABEL: test_crc32cb: +; CHECK: crc32cb w0, w0, w1 + %bits = zext i8 %next to i32 + %val = call i32 @llvm.arm64.crc32cb(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32ch(i32 %cur, i16 %next) { +; CHECK-LABEL: test_crc32ch: +; CHECK: crc32ch w0, w0, w1 + %bits = zext i16 %next to i32 + %val = call i32 @llvm.arm64.crc32ch(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32cw(i32 %cur, i32 %next) { +; CHECK-LABEL: test_crc32cw: +; CHECK: crc32cw w0, w0, w1 + %val = call i32 @llvm.arm64.crc32cw(i32 %cur, i32 %next) + ret i32 %val +} + +define i32 @test_crc32cx(i32 %cur, i64 %next) { +; CHECK-LABEL: test_crc32cx: +; CHECK: crc32cx w0, w0, x1 + %val = call i32 @llvm.arm64.crc32cx(i32 %cur, i64 %next) + ret i32 %val +} + +declare i32 @llvm.arm64.crc32b(i32, i32) +declare i32 @llvm.arm64.crc32h(i32, i32) +declare i32 @llvm.arm64.crc32w(i32, i32) +declare i32 @llvm.arm64.crc32x(i32, i64) + +declare i32 @llvm.arm64.crc32cb(i32, i32) +declare i32 @llvm.arm64.crc32ch(i32, i32) +declare i32 @llvm.arm64.crc32cw(i32, i32) +declare i32 @llvm.arm64.crc32cx(i32, i64) diff --git a/test/CodeGen/ARM64/crypto.ll b/test/CodeGen/ARM64/crypto.ll new file mode 100644 index 0000000..3804310 --- /dev/null +++ b/test/CodeGen/ARM64/crypto.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s | FileCheck %s + +declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) +declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) +declare <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data) +declare <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data) + +define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: test_aese: +; CHECK: aese.16b v0, v1 + %res = call <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) + ret <16 x i8> %res +} + +define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: test_aesd: +; CHECK: aesd.16b v0, v1 + %res = call <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) + ret <16 x i8> %res +} + +define <16 x i8> @test_aesmc(<16 x i8> %data) { +; CHECK-LABEL: test_aesmc: +; CHECK: aesmc.16b v0, v0 + %res = call <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data) + ret <16 x i8> %res +} + +define <16 x i8> @test_aesimc(<16 x i8> %data) { +; CHECK-LABEL: test_aesimc: +; CHECK: aesimc.16b v0, v0 + %res = call <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data) + ret <16 x i8> %res +} + +declare <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare i32 @llvm.arm64.crypto.sha1h(i32 %hash_e) +declare <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) +declare <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) + +define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1c: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1c.4s q0, [[HASH_E]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + ret <4 x i32> %res +} + +; Incomplete removal of unnecessary FMOV instructions in intrinsic SHA1 +define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1c_in_a_row: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1 +; CHECK-NOT: fmov +; CHECK: sha1c.4s q0, s[[SHA1RES]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + %extract = extractelement <4 x i32> %res, i32 0 + %res2 = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk) + ret <4 x i32> %res2 +} + +define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1p: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1p.4s q0, [[HASH_E]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1m: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1m.4s q0, [[HASH_E]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + ret <4 x i32> %res +} + +define i32 @test_sha1h(i32 %hash_e) { +; CHECK-LABEL: test_sha1h: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]] +; CHECK: fmov w0, [[RES]] + %res = call i32 @llvm.arm64.crypto.sha1h(i32 %hash_e) + ret i32 %res +} + +define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) { +; CHECK-LABEL: test_sha1su0: +; CHECK: sha1su0.4s v0, v1, v2 + %res = call <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) { +; CHECK-LABEL: test_sha1su1: +; CHECK: sha1su1.4s v0, v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) +declare <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + +define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) { +; CHECK-LABEL: test_sha256h: +; CHECK: sha256h.4s q0, q1, v2 + %res = call <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) { +; CHECK-LABEL: test_sha256h2: +; CHECK: sha256h2.4s q0, q1, v2 + + %res = call <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) { +; CHECK-LABEL: test_sha256su0: +; CHECK: sha256su0.4s v0, v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) { +; CHECK-LABEL: test_sha256su1: +; CHECK: sha256su1.4s v0, v1, v2 + %res = call <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + ret <4 x i32> %res +} diff --git a/test/CodeGen/ARM64/cse.ll b/test/CodeGen/ARM64/cse.ll new file mode 100644 index 0000000..d98bfd6 --- /dev/null +++ b/test/CodeGen/ARM64/cse.ll @@ -0,0 +1,59 @@ +; RUN: llc -O3 < %s | FileCheck %s +target triple = "arm64-apple-ios" + +; rdar://12462006 +; CSE between "icmp reg reg" and "sub reg reg". +; Both can be in the same basic block or in different basic blocks. +define i8* @t1(i8* %base, i32* nocapture %offset, i32 %size) nounwind { +entry: +; CHECK-LABEL: t1: +; CHECK: subs +; CHECK-NOT: cmp +; CHECK-NOT: sub +; CHECK: b.ge +; CHECK: sub +; CHECK: sub +; CHECK_NOT: sub +; CHECK: ret + %0 = load i32* %offset, align 4 + %cmp = icmp slt i32 %0, %size + %s = sub nsw i32 %0, %size + br i1 %cmp, label %return, label %if.end + +if.end: + %sub = sub nsw i32 %0, %size + %s2 = sub nsw i32 %s, %size + %s3 = sub nsw i32 %sub, %s2 + store i32 %s3, i32* %offset, align 4 + %add.ptr = getelementptr inbounds i8* %base, i32 %sub + br label %return + +return: + %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] + ret i8* %retval.0 +} + +; CSE between "icmp reg imm" and "sub reg imm". +define i8* @t2(i8* %base, i32* nocapture %offset) nounwind { +entry: +; CHECK-LABEL: t2: +; CHECK: subs +; CHECK-NOT: cmp +; CHECK-NOT: sub +; CHECK: b.lt +; CHECK-NOT: sub +; CHECK: ret + %0 = load i32* %offset, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %return, label %if.end + +if.end: + %sub = sub nsw i32 %0, 1 + store i32 %sub, i32* %offset, align 4 + %add.ptr = getelementptr inbounds i8* %base, i32 %sub + br label %return + +return: + %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] + ret i8* %retval.0 +} diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/ARM64/csel.ll new file mode 100644 index 0000000..cbf1769 --- /dev/null +++ b/test/CodeGen/ARM64/csel.ll @@ -0,0 +1,222 @@ +; RUN: llc -O3 < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64" +target triple = "arm64-unknown-unknown" + +; CHECK: foo1 +; CHECK: csinc w{{[0-9]+}}, w[[REG:[0-9]+]], +; CHECK: w[[REG]], eq +define i32 @foo1(i32 %b, i32 %c) nounwind readnone ssp { +entry: + %not.tobool = icmp ne i32 %c, 0 + %add = zext i1 %not.tobool to i32 + %b.add = add i32 %c, %b + %add1 = add i32 %b.add, %add + ret i32 %add1 +} + +; CHECK: foo2 +; CHECK: csneg w{{[0-9]+}}, w[[REG:[0-9]+]], +; CHECK: w[[REG]], eq +define i32 @foo2(i32 %b, i32 %c) nounwind readnone ssp { +entry: + %mul = sub i32 0, %b + %tobool = icmp eq i32 %c, 0 + %b.mul = select i1 %tobool, i32 %b, i32 %mul + %add = add nsw i32 %b.mul, %c + ret i32 %add +} + +; CHECK: foo3 +; CHECK: csinv w{{[0-9]+}}, w[[REG:[0-9]+]], +; CHECK: w[[REG]], eq +define i32 @foo3(i32 %b, i32 %c) nounwind readnone ssp { +entry: + %not.tobool = icmp ne i32 %c, 0 + %xor = sext i1 %not.tobool to i32 + %b.xor = xor i32 %xor, %b + %add = add nsw i32 %b.xor, %c + ret i32 %add +} + +; rdar://11632325 +define i32@foo4(i32 %a) nounwind ssp { +; CHECK: foo4 +; CHECK: csneg +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %a, -1 + %neg = sub nsw i32 0, %a + %cond = select i1 %cmp, i32 %a, i32 %neg + ret i32 %cond +} + +define i32@foo5(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: foo5 +; CHECK: subs +; CHECK-NEXT: csneg +; CHECK-NEXT: ret + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, -1 + %sub3 = sub nsw i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub3 + ret i32 %cond +} + +; make sure we can handle branch instruction in optimizeCompare. +define i32@foo6(i32 %a, i32 %b) nounwind ssp { +; CHECK: foo6 +; CHECK: b + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, 0 + br i1 %cmp, label %l.if, label %l.else + +l.if: + ret i32 1 + +l.else: + ret i32 %sub +} + +; If CPSR is used multiple times and V flag is used, we don't remove cmp. +define i32 @foo7(i32 %a, i32 %b) nounwind { +entry: +; CHECK-LABEL: foo7: +; CHECK: sub +; CHECK-next: adds +; CHECK-next: csneg +; CHECK-next: b + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, -1 + %sub3 = sub nsw i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub3 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %cmp2 = icmp slt i32 %sub, -1 + %sel = select i1 %cmp2, i32 %cond, i32 %a + ret i32 %sel + +if.else: + ret i32 %cond +} + +define i32 @foo8(i32 %v, i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: foo8: +; CHECK: cmp w0, #0 +; CHECK: csinv w0, w1, w2, ne + %tobool = icmp eq i32 %v, 0 + %neg = xor i32 -1, %b + %cond = select i1 %tobool, i32 %neg, i32 %a + ret i32 %cond +} + +define i32 @foo9(i32 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo9: +; CHECK: cmp w0, #0 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4 +; CHECK: csinv w0, w[[REG]], w[[REG]], ne + %tobool = icmp ne i32 %v, 0 + %cond = select i1 %tobool, i32 4, i32 -5 + ret i32 %cond +} + +define i64 @foo10(i64 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo10: +; CHECK: cmp x0, #0 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4 +; CHECK: csinv x0, x[[REG]], x[[REG]], ne + %tobool = icmp ne i64 %v, 0 + %cond = select i1 %tobool, i64 4, i64 -5 + ret i64 %cond +} + +define i32 @foo11(i32 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo11: +; CHECK: cmp w0, #0 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4 +; CHECK: csneg w0, w[[REG]], w[[REG]], ne + %tobool = icmp ne i32 %v, 0 + %cond = select i1 %tobool, i32 4, i32 -4 + ret i32 %cond +} + +define i64 @foo12(i64 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo12: +; CHECK: cmp x0, #0 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4 +; CHECK: csneg x0, x[[REG]], x[[REG]], ne + %tobool = icmp ne i64 %v, 0 + %cond = select i1 %tobool, i64 4, i64 -4 + ret i64 %cond +} + +define i32 @foo13(i32 %v, i32 %a, i32 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo13: +; CHECK: cmp w0, #0 +; CHECK: csneg w0, w1, w2, ne + %tobool = icmp eq i32 %v, 0 + %sub = sub i32 0, %b + %cond = select i1 %tobool, i32 %sub, i32 %a + ret i32 %cond +} + +define i64 @foo14(i64 %v, i64 %a, i64 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo14: +; CHECK: cmp x0, #0 +; CHECK: csneg x0, x1, x2, ne + %tobool = icmp eq i64 %v, 0 + %sub = sub i64 0, %b + %cond = select i1 %tobool, i64 %sub, i64 %a + ret i64 %cond +} + +define i32 @foo15(i32 %a, i32 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo15: +; CHECK: cmp w0, w1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 +; CHECK: csinc w0, w[[REG]], w[[REG]], le + %cmp = icmp sgt i32 %a, %b + %. = select i1 %cmp, i32 2, i32 1 + ret i32 %. +} + +define i32 @foo16(i32 %a, i32 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo16: +; CHECK: cmp w0, w1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 +; CHECK: csinc w0, w[[REG]], w[[REG]], gt + %cmp = icmp sgt i32 %a, %b + %. = select i1 %cmp, i32 1, i32 2 + ret i32 %. +} + +define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo17: +; CHECK: cmp x0, x1 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1 +; CHECK: csinc x0, x[[REG]], x[[REG]], le + %cmp = icmp sgt i64 %a, %b + %. = select i1 %cmp, i64 2, i64 1 + ret i64 %. +} + +define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo18: +; CHECK: cmp x0, x1 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1 +; CHECK: csinc x0, x[[REG]], x[[REG]], gt + %cmp = icmp sgt i64 %a, %b + %. = select i1 %cmp, i64 1, i64 2 + ret i64 %. +} diff --git a/test/CodeGen/ARM64/cvt.ll b/test/CodeGen/ARM64/cvt.ll new file mode 100644 index 0000000..b55a42f --- /dev/null +++ b/test/CodeGen/ARM64/cvt.ll @@ -0,0 +1,401 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +; +; Floating-point scalar convert to signed integer (to nearest with ties to away) +; +define i32 @fcvtas_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtas_1w1s: +;CHECK: fcvtas w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtas_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtas_1x1s: +;CHECK: fcvtas x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtas_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtas_1w1d: +;CHECK: fcvtas w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtas_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtas_1x1d: +;CHECK: fcvtas x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtas.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtas.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtas.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtas.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer +; +define i32 @fcvtau_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtau_1w1s: +;CHECK: fcvtau w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtau_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtau_1x1s: +;CHECK: fcvtau x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtau_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtau_1w1d: +;CHECK: fcvtau w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtau_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtau_1x1d: +;CHECK: fcvtau x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtau.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtau.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtau.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtau.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (toward -Inf) +; +define i32 @fcvtms_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtms_1w1s: +;CHECK: fcvtms w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtms_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtms_1x1s: +;CHECK: fcvtms x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtms_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtms_1w1d: +;CHECK: fcvtms w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtms_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtms_1x1d: +;CHECK: fcvtms x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtms.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtms.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtms.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtms.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (toward -Inf) +; +define i32 @fcvtmu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtmu_1w1s: +;CHECK: fcvtmu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtmu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtmu_1x1s: +;CHECK: fcvtmu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtmu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtmu_1w1d: +;CHECK: fcvtmu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtmu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtmu_1x1d: +;CHECK: fcvtmu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtmu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtmu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtmu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtmu.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (to nearest with ties to even) +; +define i32 @fcvtns_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtns_1w1s: +;CHECK: fcvtns w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtns_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtns_1x1s: +;CHECK: fcvtns x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtns_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtns_1w1d: +;CHECK: fcvtns w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtns_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtns_1x1d: +;CHECK: fcvtns x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtns.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtns.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtns.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtns.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (to nearest with ties to even) +; +define i32 @fcvtnu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtnu_1w1s: +;CHECK: fcvtnu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtnu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtnu_1x1s: +;CHECK: fcvtnu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtnu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtnu_1w1d: +;CHECK: fcvtnu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtnu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtnu_1x1d: +;CHECK: fcvtnu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtnu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtnu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtnu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtnu.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (toward +Inf) +; +define i32 @fcvtps_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtps_1w1s: +;CHECK: fcvtps w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtps_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtps_1x1s: +;CHECK: fcvtps x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtps_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtps_1w1d: +;CHECK: fcvtps w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtps_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtps_1x1d: +;CHECK: fcvtps x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtps.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtps.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtps.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtps.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (toward +Inf) +; +define i32 @fcvtpu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtpu_1w1s: +;CHECK: fcvtpu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtpu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtpu_1x1s: +;CHECK: fcvtpu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtpu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtpu_1w1d: +;CHECK: fcvtpu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtpu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtpu_1x1d: +;CHECK: fcvtpu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtpu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtpu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtpu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtpu.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (toward zero) +; +define i32 @fcvtzs_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtzs_1w1s: +;CHECK: fcvtzs w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtzs_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtzs_1x1s: +;CHECK: fcvtzs x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtzs_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtzs_1w1d: +;CHECK: fcvtzs w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtzs_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtzs_1x1d: +;CHECK: fcvtzs x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtzs.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzs.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtzs.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzs.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (toward zero) +; +define i32 @fcvtzu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtzu_1w1s: +;CHECK: fcvtzu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtzu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtzu_1x1s: +;CHECK: fcvtzu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtzu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtzu_1w1d: +;CHECK: fcvtzu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtzu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtzu_1x1d: +;CHECK: fcvtzu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtzu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtzu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzu.i64.f64(double) nounwind readnone diff --git a/test/CodeGen/ARM64/dagcombiner-convergence.ll b/test/CodeGen/ARM64/dagcombiner-convergence.ll new file mode 100644 index 0000000..a45e313 --- /dev/null +++ b/test/CodeGen/ARM64/dagcombiner-convergence.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -o /dev/null +; rdar://10795250 +; DAGCombiner should converge. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64" +target triple = "arm64-apple-macosx10.8.0" + +define i64 @foo(i128 %Params.coerce, i128 %SelLocs.coerce) { +entry: + %tmp = lshr i128 %Params.coerce, 61 + %.tr38.i = trunc i128 %tmp to i64 + %mul.i = and i64 %.tr38.i, 4294967288 + %tmp1 = lshr i128 %SelLocs.coerce, 62 + %.tr.i = trunc i128 %tmp1 to i64 + %mul7.i = and i64 %.tr.i, 4294967292 + %add.i = add i64 %mul7.i, %mul.i + %conv.i.i = and i64 %add.i, 4294967292 + ret i64 %conv.i.i +} diff --git a/test/CodeGen/ARM64/dagcombiner-load-slicing.ll b/test/CodeGen/ARM64/dagcombiner-load-slicing.ll new file mode 100644 index 0000000..0679014 --- /dev/null +++ b/test/CodeGen/ARM64/dagcombiner-load-slicing.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple arm64-apple-ios -O3 -o - < %s | FileCheck %s +; + +%class.Complex = type { float, float } +%class.Complex_int = type { i32, i32 } +%class.Complex_long = type { i64, i64 } + +; CHECK-LABEL: @test +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3 +; CHECK: ldp [[CPLX1_I:s[0-9]+]], [[CPLX1_R:s[0-9]+]], {{\[}}[[BASE]]] +; CHECK: ldp [[CPLX2_I:s[0-9]+]], [[CPLX2_R:s[0-9]+]], {{\[}}[[BASE]], #64] +; CHECK: fadd {{s[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] +; CHECK: fadd {{s[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: ret +define void @test(%class.Complex* nocapture %out, i64 %out_start) { +entry: + %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start + %0 = bitcast %class.Complex* %arrayidx to i64* + %1 = load i64* %0, align 4 + %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32 + %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float + %t0.sroa.2.0.extract.shift = lshr i64 %1, 32 + %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float + %add = add i64 %out_start, 8 + %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add + %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0 + %4 = load float* %i.i, align 4 + %add.i = fadd float %4, %2 + %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0 + %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1 + %5 = load float* %r.i, align 4 + %add5.i = fadd float %5, %3 + %retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1 + %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>* + store <2 x float> %retval.sroa.0.4.vec.insert.i, <2 x float>* %ref.tmp.sroa.0.0.cast, align 4 + ret void +} + +; CHECK-LABEL: @test_int +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3 +; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], {{\[}}[[BASE]]] +; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], {{\[}}[[BASE]], #64] +; CHECK: add {{w[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] +; CHECK: add {{w[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: ret +define void @test_int(%class.Complex_int* nocapture %out, i64 %out_start) { +entry: + %arrayidx = getelementptr inbounds %class.Complex_int* %out, i64 %out_start + %0 = bitcast %class.Complex_int* %arrayidx to i64* + %1 = load i64* %0, align 4 + %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32 + %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to i32 + %t0.sroa.2.0.extract.shift = lshr i64 %1, 32 + %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %t0.sroa.2.0.extract.trunc to i32 + %add = add i64 %out_start, 8 + %arrayidx2 = getelementptr inbounds %class.Complex_int* %out, i64 %add + %i.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0, i32 0 + %4 = load i32* %i.i, align 4 + %add.i = add i32 %4, %2 + %retval.sroa.0.0.vec.insert.i = insertelement <2 x i32> undef, i32 %add.i, i32 0 + %r.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0, i32 1 + %5 = load i32* %r.i, align 4 + %add5.i = add i32 %5, %3 + %retval.sroa.0.4.vec.insert.i = insertelement <2 x i32> %retval.sroa.0.0.vec.insert.i, i32 %add5.i, i32 1 + %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_int* %arrayidx to <2 x i32>* + store <2 x i32> %retval.sroa.0.4.vec.insert.i, <2 x i32>* %ref.tmp.sroa.0.0.cast, align 4 + ret void +} + +; CHECK-LABEL: @test_long +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #4 +; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], {{\[}}[[BASE]]] +; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], {{\[}}[[BASE]], #128] +; CHECK: add {{x[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] +; CHECK: add {{x[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: ret +define void @test_long(%class.Complex_long* nocapture %out, i64 %out_start) { +entry: + %arrayidx = getelementptr inbounds %class.Complex_long* %out, i64 %out_start + %0 = bitcast %class.Complex_long* %arrayidx to i128* + %1 = load i128* %0, align 4 + %t0.sroa.0.0.extract.trunc = trunc i128 %1 to i64 + %2 = bitcast i64 %t0.sroa.0.0.extract.trunc to i64 + %t0.sroa.2.0.extract.shift = lshr i128 %1, 64 + %t0.sroa.2.0.extract.trunc = trunc i128 %t0.sroa.2.0.extract.shift to i64 + %3 = bitcast i64 %t0.sroa.2.0.extract.trunc to i64 + %add = add i64 %out_start, 8 + %arrayidx2 = getelementptr inbounds %class.Complex_long* %out, i64 %add + %i.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0, i32 0 + %4 = load i64* %i.i, align 4 + %add.i = add i64 %4, %2 + %retval.sroa.0.0.vec.insert.i = insertelement <2 x i64> undef, i64 %add.i, i32 0 + %r.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0, i32 1 + %5 = load i64* %r.i, align 4 + %add5.i = add i64 %5, %3 + %retval.sroa.0.4.vec.insert.i = insertelement <2 x i64> %retval.sroa.0.0.vec.insert.i, i64 %add5.i, i32 1 + %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_long* %arrayidx to <2 x i64>* + store <2 x i64> %retval.sroa.0.4.vec.insert.i, <2 x i64>* %ref.tmp.sroa.0.0.cast, align 4 + ret void +} diff --git a/test/CodeGen/ARM64/dup.ll b/test/CodeGen/ARM64/dup.ll new file mode 100644 index 0000000..e659575 --- /dev/null +++ b/test/CodeGen/ARM64/dup.ll @@ -0,0 +1,322 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s + +define <8 x i8> @v_dup8(i8 %A) nounwind { +;CHECK-LABEL: v_dup8: +;CHECK: dup.8b + %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0 + %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1 + %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2 + %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3 + %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4 + %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5 + %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6 + %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7 + ret <8 x i8> %tmp8 +} + +define <4 x i16> @v_dup16(i16 %A) nounwind { +;CHECK-LABEL: v_dup16: +;CHECK: dup.4h + %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0 + %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1 + %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2 + %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @v_dup32(i32 %A) nounwind { +;CHECK-LABEL: v_dup32: +;CHECK: dup.2s + %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0 + %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1 + ret <2 x i32> %tmp2 +} + +define <2 x float> @v_dupfloat(float %A) nounwind { +;CHECK-LABEL: v_dupfloat: +;CHECK: dup.2s + %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0 + %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1 + ret <2 x float> %tmp2 +} + +define <16 x i8> @v_dupQ8(i8 %A) nounwind { +;CHECK-LABEL: v_dupQ8: +;CHECK: dup.16b + %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0 + %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1 + %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2 + %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3 + %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4 + %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5 + %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6 + %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7 + %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8 + %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9 + %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10 + %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11 + %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12 + %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13 + %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14 + %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15 + ret <16 x i8> %tmp16 +} + +define <8 x i16> @v_dupQ16(i16 %A) nounwind { +;CHECK-LABEL: v_dupQ16: +;CHECK: dup.8h + %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0 + %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1 + %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2 + %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3 + %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4 + %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5 + %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6 + %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7 + ret <8 x i16> %tmp8 +} + +define <4 x i32> @v_dupQ32(i32 %A) nounwind { +;CHECK-LABEL: v_dupQ32: +;CHECK: dup.4s + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0 + %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1 + %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2 + %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @v_dupQfloat(float %A) nounwind { +;CHECK-LABEL: v_dupQfloat: +;CHECK: dup.4s + %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0 + %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1 + %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2 + %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3 + ret <4 x float> %tmp4 +} + +; Check to make sure it works with shuffles, too. + +define <8 x i8> @v_shuffledup8(i8 %A) nounwind { +;CHECK-LABEL: v_shuffledup8: +;CHECK: dup.8b + %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0 + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %tmp2 +} + +define <4 x i16> @v_shuffledup16(i16 %A) nounwind { +;CHECK-LABEL: v_shuffledup16: +;CHECK: dup.4h + %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %tmp2 +} + +define <2 x i32> @v_shuffledup32(i32 %A) nounwind { +;CHECK-LABEL: v_shuffledup32: +;CHECK: dup.2s + %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0 + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer + ret <2 x i32> %tmp2 +} + +define <2 x float> @v_shuffledupfloat(float %A) nounwind { +;CHECK-LABEL: v_shuffledupfloat: +;CHECK: dup.2s + %tmp1 = insertelement <2 x float> undef, float %A, i32 0 + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer + ret <2 x float> %tmp2 +} + +define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind { +;CHECK-LABEL: v_shuffledupQ8: +;CHECK: dup.16b + %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0 + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %tmp2 +} + +define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind { +;CHECK-LABEL: v_shuffledupQ16: +;CHECK: dup.8h + %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0 + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %tmp2 +} + +define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind { +;CHECK-LABEL: v_shuffledupQ32: +;CHECK: dup.4s + %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0 + %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %tmp2 +} + +define <4 x float> @v_shuffledupQfloat(float %A) nounwind { +;CHECK-LABEL: v_shuffledupQfloat: +;CHECK: dup.4s + %tmp1 = insertelement <4 x float> undef, float %A, i32 0 + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %tmp2 +} + +define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind { +;CHECK-LABEL: vduplane8: +;CHECK: dup.8b + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind { +;CHECK-LABEL: vduplane16: +;CHECK: dup.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind { +;CHECK-LABEL: vduplane32: +;CHECK: dup.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > + ret <2 x i32> %tmp2 +} + +define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind { +;CHECK-LABEL: vduplanefloat: +;CHECK: dup.2s + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 > + ret <2 x float> %tmp2 +} + +define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind { +;CHECK-LABEL: vduplaneQ8: +;CHECK: dup.16b + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind { +;CHECK-LABEL: vduplaneQ16: +;CHECK: dup.8h + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind { +;CHECK-LABEL: vduplaneQ32: +;CHECK: dup.4s + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x i32> %tmp2 +} + +define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind { +;CHECK-LABEL: vduplaneQfloat: +;CHECK: dup.4s + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x float> %tmp2 +} + +define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: foo: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> + ret <2 x i64> %0 +} + +define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: bar: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> + ret <2 x i64> %0 +} + +define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: baz: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +} + +define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: qux: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +} + +define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: f: +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ins.s v0[1], w1 +; CHECK-NEXT: ret + %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0 + %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1 + ret <2 x i32> %vecinit1 +} + +define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: g: +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ins.s v0[1], w1 +; CHECK-NEXT: ins.s v0[2], w1 +; CHECK-NEXT: ins.s v0[3], w0 +; CHECK-NEXT: ret + %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3 + ret <4 x i32> %vecinit3 +} + +define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone { +; CHECK-LABEL: h: +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ins.d v0[1], x1 +; CHECK-NEXT: ret + %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 + %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 + ret <2 x i64> %vecinit1 +} + +; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that +; the single value needed was of the same type as the vector. This is false if +; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16> +; BUILD_VECTOR will have an i32 as its source). In that case, the operation is +; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed. +define <4 x i16> @test_build_illegal(<4 x i32> %in) { +; CHECK-LABEL: test_build_illegal: +; CHECK: umov.s [[WTMP:w[0-9]+]], v0[3] +; CHECK: dup.4h v0, [[WTMP]] + %val = extractelement <4 x i32> %in, i32 3 + %smallval = trunc i32 %val to i16 + %vec = insertelement <4x i16> undef, i16 %smallval, i32 3 + + ret <4 x i16> %vec +} + +; We used to inherit an already extract_subvectored v4i16 from +; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing +; the formation of an indexed-by-7 MLS. +define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +; CHECK-LABEL: test_high_splat: +; CHECK: mls.4h v0, v1, v2[7] +entry: + %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> + %mul = mul <4 x i16> %shuffle, %b + %sub = sub <4 x i16> %a, %mul + ret <4 x i16> %sub +} diff --git a/test/CodeGen/ARM64/early-ifcvt.ll b/test/CodeGen/ARM64/early-ifcvt.ll new file mode 100644 index 0000000..a5c1e26 --- /dev/null +++ b/test/CodeGen/ARM64/early-ifcvt.ll @@ -0,0 +1,423 @@ +; RUN: llc < %s -stress-early-ifcvt | FileCheck %s +target triple = "arm64-apple-macosx" + +; CHECK: mm2 +define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp { +entry: + br label %do.body + +; CHECK: do.body +; Loop body has no branches before the backedge. +; CHECK-NOT: LBB +do.body: + %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ] + %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ] + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ] + %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ] + %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1 + %0 = load i32* %p.addr.0, align 4 + %cmp = icmp sgt i32 %0, %max.0 + br i1 %cmp, label %do.cond, label %if.else + +if.else: + %cmp1 = icmp slt i32 %0, %min.0 + %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0 + br label %do.cond + +do.cond: + %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ] + %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ] +; CHECK: cbnz + %dec = add i32 %n.addr.0, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: + %sub = sub nsw i32 %max.1, %min.1 + ret i32 %sub +} + +; CHECK-LABEL: fold_inc_true_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinc w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @fold_inc_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inc = add nsw i32 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %y, %eq_bb ], [ %inc, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inc_true_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinc x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @fold_inc_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inc = add nsw i64 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %y, %eq_bb ], [ %inc, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_inc_false_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinc w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @fold_inc_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inc = add nsw i32 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %inc, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inc_false_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinc x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @fold_inc_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inc = add nsw i64 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %inc, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_inv_true_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinv w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @fold_inv_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inv = xor i32 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %y, %eq_bb ], [ %inv, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inv_true_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinv x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @fold_inv_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inv = xor i64 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %y, %eq_bb ], [ %inv, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_inv_false_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinv w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @fold_inv_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inv = xor i32 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %inv, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inv_false_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinv x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @fold_inv_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inv = xor i64 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %inv, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_neg_true_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csneg w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @fold_neg_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %neg = sub nsw i32 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %y, %eq_bb ], [ %neg, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_neg_true_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csneg x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @fold_neg_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %neg = sub nsw i64 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %y, %eq_bb ], [ %neg, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_neg_false_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csneg w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @fold_neg_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %neg = sub nsw i32 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %neg, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_neg_false_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csneg x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @fold_neg_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %neg = sub nsw i64 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %neg, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: cbnz_32 +; CHECK: {{subs.*wzr,|cmp}} w2, #0 +; CHECK-NEXT: csel w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @cbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: cbnz_64 +; CHECK: {{subs.*xzr,|cmp}} x2, #0 +; CHECK-NEXT: csel x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @cbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: cbz_32 +; CHECK: {{subs.*wzr,|cmp}} w2, #0 +; CHECK-NEXT: csel w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @cbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp ne i32 %c, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: cbz_64 +; CHECK: {{subs.*xzr,|cmp}} x2, #0 +; CHECK-NEXT: csel x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @cbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp ne i64 %c, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: tbnz_32 +; CHECK: {{ands.*xzr,|tst}} x2, #0x80 +; CHECK-NEXT: csel w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @tbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %mask = and i32 %c, 128 + %tobool = icmp eq i32 %mask, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: tbnz_64 +; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000 +; CHECK-NEXT: csel x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @tbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %mask = and i64 %c, 9223372036854775808 + %tobool = icmp eq i64 %mask, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: tbz_32 +; CHECK: {{ands.*xzr,|tst}} x2, #0x80 +; CHECK-NEXT: csel w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @tbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %mask = and i32 %c, 128 + %tobool = icmp ne i32 %mask, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: tbz_64 +; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000 +; CHECK-NEXT: csel x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @tbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %mask = and i64 %c, 9223372036854775808 + %tobool = icmp ne i64 %mask, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ] + ret i64 %cond +} + +; This function from 175.vpr folds an ADDWri into a CSINC. +; Remember to clear the kill flag on the ADDWri. +define i32 @get_ytrack_to_xtracks() nounwind ssp { +entry: + br label %for.body + +for.body: + %x0 = load i32* undef, align 4 + br i1 undef, label %if.then.i146, label %is_sbox.exit155 + +if.then.i146: + %add8.i143 = add nsw i32 0, %x0 + %rem.i144 = srem i32 %add8.i143, %x0 + %add9.i145 = add i32 %rem.i144, 1 + br label %is_sbox.exit155 + +is_sbox.exit155: ; preds = %if.then.i146, %for.body + %seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef, %for.body ] + %idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64 + %arrayidx18.i154 = getelementptr inbounds i32* null, i64 %idxprom15.i152 + %x1 = load i32* %arrayidx18.i154, align 4 + br i1 undef, label %for.body51, label %for.body + +for.body51: ; preds = %is_sbox.exit155 + call fastcc void @get_switch_type(i32 %x1, i32 undef, i16 signext undef, i16 signext undef, i16* undef) + unreachable +} +declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, i16* nocapture) nounwind ssp diff --git a/test/CodeGen/ARM64/elf-calls.ll b/test/CodeGen/ARM64/elf-calls.ll new file mode 100644 index 0000000..8c40203 --- /dev/null +++ b/test/CodeGen/ARM64/elf-calls.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=arm64-linux-gnu - -r | FileCheck %s --check-prefix=CHECK-OBJ + +declare void @callee() + +define void @caller() { + call void @callee() + ret void +; CHECK-LABEL: caller: +; CHECK: bl callee +; CHECK-OBJ: R_AARCH64_CALL26 callee +} + +define void @tail_caller() { + tail call void @callee() + ret void +; CHECK-LABEL: tail_caller: +; CHECK: b callee +; CHECK-OBJ: R_AARCH64_JUMP26 callee +} diff --git a/test/CodeGen/ARM64/elf-constpool.ll b/test/CodeGen/ARM64/elf-constpool.ll new file mode 100644 index 0000000..95d3343 --- /dev/null +++ b/test/CodeGen/ARM64/elf-constpool.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -O0 -o - %s | FileCheck %s + +; O0 checked for fastisel purposes. It has a separate path which +; creates a constpool entry for floating values. + +define double @needs_const() { + ret double 3.14159 +; CHECK: .LCPI0_0: + +; CHECK: adrp {{x[0-9]+}}, .LCPI0_0 +; CHECK: ldr d0, [{{x[0-9]+}}, :lo12:.LCPI0_0] +} diff --git a/test/CodeGen/ARM64/elf-globals.ll b/test/CodeGen/ARM64/elf-globals.ll new file mode 100644 index 0000000..598c96a --- /dev/null +++ b/test/CodeGen/ARM64/elf-globals.ll @@ -0,0 +1,115 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-PIC +; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-FAST-PIC + +@var8 = external global i8, align 1 +@var16 = external global i16, align 2 +@var32 = external global i32, align 4 +@var64 = external global i64, align 8 + +define i8 @test_i8(i8 %new) { + %val = load i8* @var8, align 1 + store i8 %new, i8* @var8 + ret i8 %val +; CHECK-LABEL: test_i8: +; CHECK: adrp x[[HIREG:[0-9]+]], var8 +; CHECK: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] +; CHECK: strb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] + +; CHECK-PIC-LABEL: test_i8: +; CHECK-PIC: adrp x[[HIREG:[0-9]+]], :got:var8 +; CHECK-PIC: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8] +; CHECK-PIC: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var8 +; CHECK-FAST: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] + +; CHECK-FAST-PIC: adrp x[[HIREG:[0-9]+]], :got:var8 +; CHECK-FAST-PIC: ldr x[[VARADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8] +; CHECK-FAST-PIC: ldr {{w[0-9]+}}, [x[[VARADDR]]] +} + +define i16 @test_i16(i16 %new) { + %val = load i16* @var16, align 2 + store i16 %new, i16* @var16 + ret i16 %val +; CHECK-LABEL: test_i16: +; CHECK: adrp x[[HIREG:[0-9]+]], var16 +; CHECK: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16] +; CHECK: strh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var16 +; CHECK-FAST: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16] +} + +define i32 @test_i32(i32 %new) { + %val = load i32* @var32, align 4 + store i32 %new, i32* @var32 + ret i32 %val +; CHECK-LABEL: test_i32: +; CHECK: adrp x[[HIREG:[0-9]+]], var32 +; CHECK: ldr {{w[0-9]+}}, [x[[HIREG]], :lo12:var32] +; CHECK: str {{w[0-9]+}}, [x[[HIREG]], :lo12:var32] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var32 +; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var32 +} + +define i64 @test_i64(i64 %new) { + %val = load i64* @var64, align 8 + store i64 %new, i64* @var64 + ret i64 %val +; CHECK-LABEL: test_i64: +; CHECK: adrp x[[HIREG:[0-9]+]], var64 +; CHECK: ldr {{x[0-9]+}}, [x[[HIREG]], :lo12:var64] +; CHECK: str {{x[0-9]+}}, [x[[HIREG]], :lo12:var64] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var64 +; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var64 +} + +define i64* @test_addr() { + ret i64* @var64 +; CHECK-LABEL: test_addr: +; CHECK: adrp [[HIREG:x[0-9]+]], var64 +; CHECK: add x0, [[HIREG]], :lo12:var64 + +; CHECK-FAST: adrp [[HIREG:x[0-9]+]], var64 +; CHECK-FAST: add x0, [[HIREG]], :lo12:var64 +} + +@hiddenvar = hidden global i32 0, align 4 +@protectedvar = protected global i32 0, align 4 + +define i32 @test_vis() { + %lhs = load i32* @hiddenvar, align 4 + %rhs = load i32* @protectedvar, align 4 + %ret = add i32 %lhs, %rhs + ret i32 %ret +; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar +; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:hiddenvar] +; CHECK-PIC: adrp {{x[0-9]+}}, protectedvar +; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:protectedvar] +} + +@var_default = external global [2 x i32] + +define i32 @test_default_align() { + %addr = getelementptr [2 x i32]* @var_default, i32 0, i32 0 + %val = load i32* %addr + ret i32 %val +; CHECK-LABEL: test_default_align: +; CHECK: adrp x[[HIREG:[0-9]+]], var_default +; CHECK: ldr w0, [x[[HIREG]], :lo12:var_default] +} + +define i64 @test_default_unaligned() { + %addr = bitcast [2 x i32]* @var_default to i64* + %val = load i64* %addr + ret i64 %val +; CHECK-LABEL: test_default_unaligned: +; CHECK: adrp [[HIREG:x[0-9]+]], var_default +; CHECK: add x[[ADDR:[0-9]+]], [[HIREG]], :lo12:var_default +; CHECK: ldr x0, [x[[ADDR]]] +} diff --git a/test/CodeGen/ARM64/ext.ll b/test/CodeGen/ARM64/ext.ll new file mode 100644 index 0000000..57d6e0c --- /dev/null +++ b/test/CodeGen/ARM64/ext.ll @@ -0,0 +1,101 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextd: +;CHECK: {{ext.8b.*#3}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextRd: +;CHECK: {{ext.8b.*#5}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextq: +;CHECK: {{ext.16b.*3}} + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextRq: +;CHECK: {{ext.16b.*7}} + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: test_vextd16: +;CHECK: {{ext.8b.*#6}} + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} + +define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: test_vextq32: +;CHECK: {{ext.16b.*12}} + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + ret <4 x i32> %tmp3 +} + +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextd_undef: +;CHECK: {{ext.8b.*}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextRq_undef: +;CHECK: {{ext.16b.*#7}} + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +; Tests for ReconstructShuffle function. Indices have to be carefully +; chosen to reach lowering phase as a BUILD_VECTOR. + +; One vector needs vext, the other can be handled by extract_subvector +; Also checks interleaving of sources is handled correctly. +; Essence: a vext is used on %A and something saner than stack load/store for final result. +define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: test_interleaved: +;CHECK: ext.8b +;CHECK: zip1.4h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} + +; An undef in the shuffle list should still be optimizable +define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: test_undef: +;CHECK: zip1.4h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} diff --git a/test/CodeGen/ARM64/extend-int-to-fp.ll b/test/CodeGen/ARM64/extend-int-to-fp.ll new file mode 100644 index 0000000..599a697 --- /dev/null +++ b/test/CodeGen/ARM64/extend-int-to-fp.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define <4 x float> @foo(<4 x i16> %a) nounwind { +; CHECK-LABEL: foo: +; CHECK: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: ret + %vcvt.i = uitofp <4 x i16> %a to <4 x float> + ret <4 x float> %vcvt.i +} + +define <4 x float> @bar(<4 x i16> %a) nounwind { +; CHECK-LABEL: bar: +; CHECK: sshll.4s v0, v0, #0 +; CHECK-NEXT: scvtf.4s v0, v0 +; CHECK-NEXT: ret + %vcvt.i = sitofp <4 x i16> %a to <4 x float> + ret <4 x float> %vcvt.i +} diff --git a/test/CodeGen/ARM64/extend.ll b/test/CodeGen/ARM64/extend.ll new file mode 100644 index 0000000..4d20543 --- /dev/null +++ b/test/CodeGen/ARM64/extend.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s +@array = external global [0 x i32] + +define i64 @foo(i32 %i) { +; CHECK: foo +; CHECK: adrp x[[REG:[0-9]+]], _array@GOTPAGE +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _array@GOTPAGEOFF] +; CHECK: ldrsw x0, [x[[REG1]], x0, sxtw #2] +; CHECK: ret + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds [0 x i32]* @array, i64 0, i64 %idxprom + %tmp1 = load i32* %arrayidx, align 4 + %conv = sext i32 %tmp1 to i64 + ret i64 %conv +} diff --git a/test/CodeGen/ARM64/extern-weak.ll b/test/CodeGen/ARM64/extern-weak.ll new file mode 100644 index 0000000..a239403 --- /dev/null +++ b/test/CodeGen/ARM64/extern-weak.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=arm64-none-linux-gnu -o - < %s | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s + +declare extern_weak i32 @var() + +define i32()* @foo() { +; The usual ADRP/ADD pair can't be used for a weak reference because it must +; evaluate to 0 if the symbol is undefined. We use a litpool entry. + ret i32()* @var + +; CHECK: adrp x[[VAR:[0-9]+]], :got:var +; CHECK: ldr x0, [x[[VAR]], :got_lo12:var] + + ; In the large model, the usual relocations are absolute and can + ; materialise 0. +; CHECK-LARGE: movz x0, #:abs_g3:var +; CHECK-LARGE: movk x0, #:abs_g2_nc:var +; CHECK-LARGE: movk x0, #:abs_g1_nc:var +; CHECK-LARGE: movk x0, #:abs_g0_nc:var +} + + +@arr_var = extern_weak global [10 x i32] + +define i32* @bar() { + %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5 +; CHECK: adrp x[[ARR_VAR_HI:[0-9]+]], :got:arr_var +; CHECK: ldr [[ARR_VAR:x[0-9]+]], [x[[ARR_VAR_HI]], :got_lo12:arr_var] +; CHECK: add x0, [[ARR_VAR]], #20 + ret i32* %addr + + ; In the large model, the usual relocations are absolute and can + ; materialise 0. +; CHECK-LARGE: movz [[ARR_VAR:x[0-9]+]], #:abs_g3:arr_var +; CHECK-LARGE: movk [[ARR_VAR]], #:abs_g2_nc:arr_var +; CHECK-LARGE: movk [[ARR_VAR]], #:abs_g1_nc:arr_var +; CHECK-LARGE: movk [[ARR_VAR]], #:abs_g0_nc:arr_var +} + +@defined_weak_var = internal unnamed_addr global i32 0 + +define i32* @wibble() { + ret i32* @defined_weak_var +; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var +; CHECK: add x0, [[BASE]], :lo12:defined_weak_var + +; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var +; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var +; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var +; CHECK-LARGE: movk x0, #:abs_g0_nc:defined_weak_var +} diff --git a/test/CodeGen/ARM64/extload-knownzero.ll b/test/CodeGen/ARM64/extload-knownzero.ll new file mode 100644 index 0000000..14e5fd3 --- /dev/null +++ b/test/CodeGen/ARM64/extload-knownzero.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s +; rdar://12771555 + +define void @foo(i16* %ptr, i32 %a) nounwind { +entry: +; CHECK-LABEL: foo: + %tmp1 = icmp ult i32 %a, 100 + br i1 %tmp1, label %bb1, label %bb2 +bb1: +; CHECK: %bb1 +; CHECK: ldrh [[REG:w[0-9]+]] + %tmp2 = load i16* %ptr, align 2 + br label %bb2 +bb2: +; CHECK: %bb2 +; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff +; CHECK: cmp [[REG]], #23 + %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ] + %cmp = icmp ult i16 %tmp3, 24 + br i1 %cmp, label %bb3, label %exit +bb3: + call void @bar() nounwind + br label %exit +exit: + ret void +} + +declare void @bar () diff --git a/test/CodeGen/ARM64/extract.ll b/test/CodeGen/ARM64/extract.ll new file mode 100644 index 0000000..119751c --- /dev/null +++ b/test/CodeGen/ARM64/extract.ll @@ -0,0 +1,58 @@ +; RUN: llc -arm64-extr-generation=true -verify-machineinstrs < %s \ +; RUN: -march=arm64 | FileCheck %s + +define i64 @ror_i64(i64 %in) { +; CHECK-LABEL: ror_i64: + %left = shl i64 %in, 19 + %right = lshr i64 %in, 45 + %val5 = or i64 %left, %right +; CHECK: extr {{x[0-9]+}}, x0, x0, #45 + ret i64 %val5 +} + +define i32 @ror_i32(i32 %in) { +; CHECK-LABEL: ror_i32: + %left = shl i32 %in, 9 + %right = lshr i32 %in, 23 + %val5 = or i32 %left, %right +; CHECK: extr {{w[0-9]+}}, w0, w0, #23 + ret i32 %val5 +} + +define i32 @extr_i32(i32 %lhs, i32 %rhs) { +; CHECK-LABEL: extr_i32: + %left = shl i32 %lhs, 6 + %right = lshr i32 %rhs, 26 + %val = or i32 %left, %right + ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use + ; something other than w0 and w1. +; CHECK: extr {{w[0-9]+}}, w0, w1, #26 + + ret i32 %val +} + +define i64 @extr_i64(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: extr_i64: + %right = lshr i64 %rhs, 40 + %left = shl i64 %lhs, 24 + %val = or i64 %right, %left + ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use + ; something other than w0 and w1. +; CHECK: extr {{x[0-9]+}}, x0, x1, #40 + + ret i64 %val +} + +; Regression test: a bad experimental pattern crept into git which optimised +; this pattern to a single EXTR. +define i32 @extr_regress(i32 %a, i32 %b) { +; CHECK-LABEL: extr_regress: + + %sh1 = shl i32 %a, 14 + %sh2 = lshr i32 %b, 14 + %val = or i32 %sh2, %sh1 +; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}} + + ret i32 %val +; CHECK: ret +} diff --git a/test/CodeGen/ARM64/extract_subvector.ll b/test/CodeGen/ARM64/extract_subvector.ll new file mode 100644 index 0000000..20c05fb --- /dev/null +++ b/test/CodeGen/ARM64/extract_subvector.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s + +; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn. + +define <8 x i8> @v8i8(<16 x i8> %a) nounwind { +; CHECK: v8i8 +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> + ret <8 x i8> %ret +} + +define <4 x i16> @v4i16(<8 x i16> %a) nounwind { +; CHECK-LABEL: v4i16: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> + ret <4 x i16> %ret +} + +define <2 x i32> @v2i32(<4 x i32> %a) nounwind { +; CHECK-LABEL: v2i32: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> + ret <2 x i32> %ret +} + +define <1 x i64> @v1i64(<2 x i64> %a) nounwind { +; CHECK-LABEL: v1i64: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> + ret <1 x i64> %ret +} + +define <2 x float> @v2f32(<4 x float> %a) nounwind { +; CHECK-LABEL: v2f32: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> + ret <2 x float> %ret +} + +define <1 x double> @v1f64(<2 x double> %a) nounwind { +; CHECK-LABEL: v1f64: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <2 x double> %a, <2 x double> %a, <1 x i32> + ret <1 x double> %ret +} diff --git a/test/CodeGen/ARM64/fast-isel-addr-offset.ll b/test/CodeGen/ARM64/fast-isel-addr-offset.ll new file mode 100644 index 0000000..a4326dc --- /dev/null +++ b/test/CodeGen/ARM64/fast-isel-addr-offset.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s + +@sortlist = common global [5001 x i32] zeroinitializer, align 16 +@sortlist2 = common global [5001 x i64] zeroinitializer, align 16 + +; Load an address with an offset larget then LDR imm can handle +define i32 @foo() nounwind { +entry: +; CHECK: @foo +; CHECK: adrp x[[REG:[0-9]+]], _sortlist@GOTPAGE +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist@GOTPAGEOFF] +; CHECK: movz x[[REG2:[0-9]+]], #20000 +; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]] +; CHECK: ldr w0, [x[[REG3]]] +; CHECK: ret + %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4 + ret i32 %0 +} + +define i64 @foo2() nounwind { +entry: +; CHECK: @foo2 +; CHECK: adrp x[[REG:[0-9]+]], _sortlist2@GOTPAGE +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist2@GOTPAGEOFF] +; CHECK: movz x[[REG2:[0-9]+]], #40000 +; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]] +; CHECK: ldr x0, [x[[REG3]]] +; CHECK: ret + %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4 + ret i64 %0 +} + +; Load an address with a ridiculously large offset. +; rdar://12505553 +@pd2 = common global i8* null, align 8 + +define signext i8 @foo3() nounwind ssp { +entry: +; CHECK: @foo3 +; CHECK: movz x[[REG:[0-9]+]], #2874, lsl #32 +; CHECK: movk x[[REG]], #29646, lsl #16 +; CHECK: movk x[[REG]], #12274 + %0 = load i8** @pd2, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 12345678901234 + %1 = load i8* %arrayidx, align 1 + ret i8 %1 +} diff --git a/test/CodeGen/ARM64/fast-isel-alloca.ll b/test/CodeGen/ARM64/fast-isel-alloca.ll new file mode 100644 index 0000000..8bbee16 --- /dev/null +++ b/test/CodeGen/ARM64/fast-isel-alloca.ll @@ -0,0 +1,24 @@ +; This test should cause the TargetMaterializeAlloca to be invoked +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s + +%struct.S1Ty = type { i64 } +%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty } + +define void @takeS1(%struct.S1Ty* %V) nounwind { +entry: + %V.addr = alloca %struct.S1Ty*, align 8 + store %struct.S1Ty* %V, %struct.S1Ty** %V.addr, align 8 + ret void +} + +define void @main() nounwind { +entry: +; CHECK: main +; CHECK: mov x[[REG:[0-9]+]], sp +; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8 +; CHECK-NEXT: add x0, x[[REG]], x[[REG1]] + %E = alloca %struct.S2Ty, align 4 + %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1 + call void @takeS1(%struct.S1Ty* %B) + ret void +} diff --git a/test/CodeGen/ARM64/fast-isel-br.ll b/test/CodeGen/ARM64/fast-isel-br.ll new file mode 100644 index 0000000..8fd32fd --- /dev/null +++ b/test/CodeGen/ARM64/fast-isel-br.ll @@ -0,0 +1,155 @@ +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s + +define void @branch1() nounwind uwtable ssp { + %x = alloca i32, align 4 + store i32 0, i32* %x, align 4 + %1 = load i32* %x, align 4 + %2 = icmp ne i32 %1, 0 + br i1 %2, label %3, label %4 + +;