From 36b56886974eae4f9c5ebc96befd3e7bfe5de338 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Wed, 23 Apr 2014 16:57:46 -0700 Subject: Update to LLVM 3.5a. Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617 --- test/Transforms/AddDiscriminators/basic.ll | 59 +++ test/Transforms/AddDiscriminators/first-only.ll | 82 ++++ test/Transforms/AddDiscriminators/multiple.ll | 71 ++++ test/Transforms/ArgumentPromotion/inalloca.ll | 49 +++ test/Transforms/ArgumentPromotion/tail.ll | 20 + test/Transforms/BBVectorize/simple-int.ll | 5 +- .../2007-10-19-InlineAsmDirectives.ll | 2 +- .../CodeGenPrepare/X86/extend-sink-hoist.ll | 64 +++ test/Transforms/CodeGenPrepare/X86/lit.local.cfg | 4 + .../CodeGenPrepare/X86/x86-shuffle-sink.ll | 105 +++++ .../ConstantHoisting/X86/const-base-addr.ll | 24 ++ .../ConstantHoisting/X86/delete-dead-cast-inst.ll | 16 + test/Transforms/ConstantHoisting/X86/lit.local.cfg | 4 + test/Transforms/ConstantHoisting/X86/phi.ll | 116 ++++++ test/Transforms/ConstantHoisting/X86/stackmap.ll | 17 + test/Transforms/ConstantMerge/linker-private.ll | 23 -- test/Transforms/DeadArgElim/deadexternal.ll | 2 +- test/Transforms/DeadArgElim/keepalive.ll | 16 + test/Transforms/DeadStoreElimination/simple.ll | 9 + test/Transforms/FunctionAttrs/nocapture.ll | 15 + test/Transforms/FunctionAttrs/readattrs.ll | 6 + test/Transforms/GCOVProfiling/version.ll | 4 +- test/Transforms/GVN/2009-03-10-PREOnVoid.ll | 56 ++- .../GVN/unreachable_block_infinite_loop.ll | 2 +- test/Transforms/GlobalMerge/ARM/arm.ll | 85 ++++ test/Transforms/GlobalMerge/ARM/lit.local.cfg | 4 + test/Transforms/GlobalMerge/ARM64/arm64.ll | 88 ++++ test/Transforms/GlobalMerge/ARM64/lit.local.cfg | 4 + .../GlobalOpt/2009-02-15-BitcastAlias.ll | 2 +- test/Transforms/GlobalOpt/alias-resolve.ll | 26 +- .../GlobalOpt/alias-used-address-space.ll | 26 ++ test/Transforms/GlobalOpt/alias-used-section.ll | 8 + test/Transforms/GlobalOpt/fastcc.ll | 46 +++ test/Transforms/GlobalOpt/memset.ll | 19 +- test/Transforms/IndVarSimplify/iv-widen.ll | 40 ++ .../IndVarSimplify/lcssa-preservation.ll | 51 +++ .../Transforms/IndVarSimplify/lftr-extend-const.ll | 4 +- test/Transforms/IndVarSimplify/lftr-reuse.ll | 10 +- test/Transforms/IndVarSimplify/overflowcheck.ll | 56 +++ .../Transforms/IndVarSimplify/tripcount_compute.ll | 31 ++ test/Transforms/Inline/ignore-debug-info.ll | 55 +++ test/Transforms/Inline/inline-cold.ll | 88 ++++ test/Transforms/Inline/inline_invoke.ll | 5 - test/Transforms/Inline/inline_returns_twice.ll | 2 +- test/Transforms/Inline/invoke-cleanup.ll | 39 ++ test/Transforms/Inline/invoke-combine-clauses.ll | 117 ++++++ test/Transforms/Inline/ptr-diff.ll | 2 +- .../InstCombine/2007-09-10-AliasConstFold.ll | 4 +- .../InstCombine/2007-09-17-AliasConstFold2.ll | 4 +- .../InstCombine/2012-04-23-Neon-Intrinsics.ll | 69 +++- test/Transforms/InstCombine/add2.ll | 35 ++ test/Transforms/InstCombine/add4.ll | 23 ++ test/Transforms/InstCombine/bitcast-store.ll | 18 +- .../InstCombine/call-cast-target-inalloca.ll | 22 + test/Transforms/InstCombine/call-cast-target.ll | 12 + test/Transforms/InstCombine/cast-call-combine.ll | 23 ++ test/Transforms/InstCombine/cast-set.ll | 7 + test/Transforms/InstCombine/cast.ll | 57 ++- test/Transforms/InstCombine/ceil.ll | 56 +++ test/Transforms/InstCombine/constant-fold-math.ll | 47 +++ test/Transforms/InstCombine/copysign.ll | 49 +++ test/Transforms/InstCombine/div.ll | 24 ++ .../InstCombine/double-float-shrink-1.ll | 5 +- test/Transforms/InstCombine/exp2-1.ll | 24 ++ test/Transforms/InstCombine/fast-math.ll | 53 +++ test/Transforms/InstCombine/fdiv.ll | 26 ++ .../Transforms/InstCombine/float-shrink-compare.ll | 54 +++ test/Transforms/InstCombine/fmul.ll | 34 +- test/Transforms/InstCombine/fpcast.ll | 9 + test/Transforms/InstCombine/fpextend.ll | 14 +- test/Transforms/InstCombine/fpextend_x86.ll | 57 +++ test/Transforms/InstCombine/fprintf-1.ll | 8 +- test/Transforms/InstCombine/getelementptr.ll | 24 +- .../InstCombine/insert-extract-shuffle.ll | 37 ++ test/Transforms/InstCombine/load-addrspace-cast.ll | 12 + test/Transforms/InstCombine/mul.ll | 16 + test/Transforms/InstCombine/onehot_merge.ll | 4 +- test/Transforms/InstCombine/pow-1.ll | 29 +- test/Transforms/InstCombine/printf-1.ll | 8 +- test/Transforms/InstCombine/rem.ll | 9 + test/Transforms/InstCombine/round.ll | 90 ++++ test/Transforms/InstCombine/select-2.ll | 10 + test/Transforms/InstCombine/select-select.ll | 24 ++ test/Transforms/InstCombine/sign-test-and-or.ll | 38 ++ test/Transforms/InstCombine/sincospi.ll | 9 +- test/Transforms/InstCombine/sprintf-1.ll | 8 +- test/Transforms/InstCombine/strchr-1.ll | 13 + test/Transforms/InstCombine/sub.ll | 54 ++- test/Transforms/InstCombine/vec_extract_var_elt.ll | 8 + test/Transforms/InstCombine/vec_phi_extract.ll | 4 +- test/Transforms/InstCombine/vec_sext.ll | 23 ++ test/Transforms/InstCombine/vec_shuffle.ll | 17 + test/Transforms/InstCombine/zext.ll | 34 ++ test/Transforms/InstSimplify/compare.ll | 18 + test/Transforms/InstSimplify/undef.ll | 7 + test/Transforms/InstSimplify/vector_gep.ll | 49 ++- test/Transforms/Internalize/lists.ll | 11 +- test/Transforms/LICM/lcssa-ssa-promoter.ll | 76 ++++ test/Transforms/LICM/scalar_promote.ll | 12 +- test/Transforms/LICM/sinking.ll | 104 ++++- test/Transforms/LICM/volatile-alias.ll | 2 +- test/Transforms/LoopReroll/basic.ll | 10 +- test/Transforms/LoopReroll/nonconst_lb.ll | 152 +++++++ test/Transforms/LoopReroll/reduction.ll | 4 +- test/Transforms/LoopRotate/PhiSelfReference-1.ll | 39 ++ test/Transforms/LoopRotate/PhiSelfRefernce-1.ll | 39 -- test/Transforms/LoopRotate/dbgvalue.ll | 6 +- .../LoopRotate/preserve-loop-simplify.ll | 65 +++ test/Transforms/LoopSimplify/ashr-crash.ll | 80 ++++ test/Transforms/LoopSimplify/notify-scev.ll | 110 +++++ .../LoopStrengthReduce/ARM64/lit.local.cfg | 5 + .../LoopStrengthReduce/ARM64/lsr-memcpy.ll | 33 ++ .../LoopStrengthReduce/ARM64/lsr-memset.ll | 101 +++++ .../X86/no_superflous_induction_vars.ll | 50 +++ test/Transforms/LoopStrengthReduce/X86/pr17473.ll | 67 +++ .../LoopStrengthReduce/lsr-expand-quadratic.ll | 2 +- test/Transforms/LoopStrengthReduce/pr18165.ll | 88 ++++ test/Transforms/LoopUnroll/X86/lit.local.cfg | 4 + test/Transforms/LoopUnroll/X86/partial.ll | 80 ++++ test/Transforms/LoopVectorize/ARM/arm-unroll.ll | 39 ++ test/Transforms/LoopVectorize/ARM64/gather-cost.ll | 85 ++++ test/Transforms/LoopVectorize/ARM64/lit.local.cfg | 6 + .../Transforms/LoopVectorize/PowerPC/lit.local.cfg | 4 + .../LoopVectorize/PowerPC/vsx-tsvc-s173.ll | 51 +++ .../LoopVectorize/X86/already-vectorized.ll | 2 +- .../LoopVectorize/X86/fp32_to_uint32-cost-model.ll | 39 ++ .../LoopVectorize/X86/fp64_to_uint32-cost-model.ll | 40 ++ .../LoopVectorize/X86/fp_to_sint8-cost-model.ll | 25 ++ .../LoopVectorize/X86/metadata-enable.ll | 175 ++++++++ test/Transforms/LoopVectorize/X86/small-size.ll | 27 +- .../LoopVectorize/X86/uint64_to_fp64-cost-model.ll | 26 ++ .../LoopVectorize/X86/unroll-small-loops.ll | 72 +++- test/Transforms/LoopVectorize/flags.ll | 26 ++ test/Transforms/LoopVectorize/float-reduction.ll | 2 +- test/Transforms/LoopVectorize/global_alias.ll | 8 +- test/Transforms/LoopVectorize/if-pred-stores.ll | 126 ++++++ test/Transforms/LoopVectorize/increment.ll | 2 +- test/Transforms/LoopVectorize/induction.ll | 4 +- .../LoopVectorize/multi-use-reduction-bug.ll | 42 ++ .../LoopVectorize/runtime-check-readonly.ll | 12 +- test/Transforms/LoopVectorize/unroll_novec.ll | 12 +- test/Transforms/LoopVectorize/value-ptr-bug.ll | 2 +- .../Transforms/LoopVectorize/version-mem-access.ll | 87 ++++ test/Transforms/LowerAtomic/atomic-swap.ll | 2 +- test/Transforms/LowerExpectIntrinsic/basic.ll | 29 ++ test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll | 15 - .../LowerInvoke/2005-08-03-InvokeWithPHI.ll | 17 - .../LowerInvoke/2005-08-03-InvokeWithPHIUse.ll | 15 - .../LowerInvoke/2008-02-14-CritEdgePhiCrash.ll | 14 - test/Transforms/LowerInvoke/basictest.ll | 30 -- test/Transforms/LowerInvoke/lowerinvoke.ll | 25 ++ test/Transforms/MemCpyOpt/form-memset.ll | 12 + test/Transforms/MemCpyOpt/memcpy-undef.ll | 46 +++ test/Transforms/MemCpyOpt/memcpy.ll | 17 + test/Transforms/MetaRenamer/metarenamer.ll | 4 +- test/Transforms/ObjCARC/allocas.ll | 4 +- .../Transforms/ObjCARC/contract-end-of-use-list.ll | 30 ++ ...ensure-that-exception-unwind-path-is-visited.ll | 4 +- test/Transforms/SLPVectorizer/ARM64/lit.local.cfg | 3 + .../SLPVectorizer/ARM64/mismatched-intrinsics.ll | 18 + .../SLPVectorizer/X86/crash_vectorizeTree.ll | 65 +++ test/Transforms/SLPVectorizer/X86/extractcost.ll | 30 ++ .../X86/insert-element-build-vector.ll | 25 ++ test/Transforms/SLPVectorizer/X86/intrinsic.ll | 75 ++++ test/Transforms/SLPVectorizer/X86/metadata.ll | 61 +++ test/Transforms/SLPVectorizer/X86/phi.ll | 2 +- test/Transforms/SLPVectorizer/X86/tiny-tree.ll | 15 + test/Transforms/SROA/address-spaces.ll | 68 ++++ test/Transforms/SROA/basictest.ll | 86 +++- test/Transforms/SROA/vector-promotion.ll | 47 +++ .../Inputs/bad_discriminator_value.prof | 2 + .../SampleProfile/Inputs/bad_fn_header.prof | 3 + .../SampleProfile/Inputs/bad_line_values.prof | 2 + .../SampleProfile/Inputs/bad_mangle.prof | 3 + .../SampleProfile/Inputs/bad_sample_line.prof | 3 + .../SampleProfile/Inputs/bad_samples.prof | 2 + test/Transforms/SampleProfile/Inputs/branch.prof | 5 +- test/Transforms/SampleProfile/Inputs/calls.prof | 10 + .../SampleProfile/Inputs/discriminator.prof | 8 + .../Transforms/SampleProfile/Inputs/propagate.prof | 17 + test/Transforms/SampleProfile/Inputs/syntax.prof | 3 + test/Transforms/SampleProfile/branch.ll | 18 +- test/Transforms/SampleProfile/calls.ll | 116 ++++++ test/Transforms/SampleProfile/discriminator.ll | 90 ++++ test/Transforms/SampleProfile/propagate.ll | 243 +++++++++++ test/Transforms/SampleProfile/syntax.ll | 20 + .../ScalarRepl/memset-aggregate-byte-leader.ll | 6 +- test/Transforms/ScalarRepl/vector_memcpy.ll | 10 +- test/Transforms/Scalarizer/basic.ll | 451 +++++++++++++++++++++ test/Transforms/Scalarizer/dbginfo.ll | 86 ++++ test/Transforms/Scalarizer/no-data-layout.ll | 25 ++ .../SimplifyCFG/X86/switch_to_lookup_table.ll | 112 +++++ test/Transforms/SimplifyCFG/basictest.ll | 30 ++ .../SimplifyCFG/no_speculative_loads_with_tsan.ll | 40 ++ .../SimplifyCFG/preserve-branchweights.ll | 84 +++- test/Transforms/SimplifyCFG/speculate-math.ll | 58 +++ .../SimplifyCFG/trapping-load-unreachable.ll | 2 +- test/Transforms/Sink/basic.ll | 79 ++++ test/Transforms/StripSymbols/2010-08-25-crash.ll | 2 +- .../StripSymbols/strip-dead-debug-info.ll | 2 +- 200 files changed, 6864 insertions(+), 339 deletions(-) create mode 100644 test/Transforms/AddDiscriminators/basic.ll create mode 100644 test/Transforms/AddDiscriminators/first-only.ll create mode 100644 test/Transforms/AddDiscriminators/multiple.ll create mode 100644 test/Transforms/ArgumentPromotion/inalloca.ll create mode 100644 test/Transforms/ArgumentPromotion/tail.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/lit.local.cfg create mode 100644 test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll create mode 100644 test/Transforms/ConstantHoisting/X86/const-base-addr.ll create mode 100644 test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll create mode 100644 test/Transforms/ConstantHoisting/X86/lit.local.cfg create mode 100644 test/Transforms/ConstantHoisting/X86/phi.ll create mode 100644 test/Transforms/ConstantHoisting/X86/stackmap.ll delete mode 100644 test/Transforms/ConstantMerge/linker-private.ll create mode 100644 test/Transforms/GlobalMerge/ARM/arm.ll create mode 100644 test/Transforms/GlobalMerge/ARM/lit.local.cfg create mode 100644 test/Transforms/GlobalMerge/ARM64/arm64.ll create mode 100644 test/Transforms/GlobalMerge/ARM64/lit.local.cfg create mode 100644 test/Transforms/GlobalOpt/alias-used-address-space.ll create mode 100644 test/Transforms/GlobalOpt/alias-used-section.ll create mode 100644 test/Transforms/GlobalOpt/fastcc.ll create mode 100644 test/Transforms/IndVarSimplify/iv-widen.ll create mode 100644 test/Transforms/IndVarSimplify/lcssa-preservation.ll create mode 100644 test/Transforms/IndVarSimplify/overflowcheck.ll create mode 100644 test/Transforms/Inline/ignore-debug-info.ll create mode 100644 test/Transforms/Inline/inline-cold.ll create mode 100644 test/Transforms/Inline/invoke-cleanup.ll create mode 100644 test/Transforms/Inline/invoke-combine-clauses.ll create mode 100644 test/Transforms/InstCombine/call-cast-target-inalloca.ll create mode 100644 test/Transforms/InstCombine/cast-call-combine.ll create mode 100644 test/Transforms/InstCombine/ceil.ll create mode 100644 test/Transforms/InstCombine/constant-fold-math.ll create mode 100644 test/Transforms/InstCombine/copysign.ll create mode 100644 test/Transforms/InstCombine/fpextend_x86.ll create mode 100644 test/Transforms/InstCombine/insert-extract-shuffle.ll create mode 100644 test/Transforms/InstCombine/load-addrspace-cast.ll create mode 100644 test/Transforms/InstCombine/round.ll create mode 100644 test/Transforms/InstCombine/select-select.ll create mode 100644 test/Transforms/LICM/lcssa-ssa-promoter.ll create mode 100644 test/Transforms/LoopReroll/nonconst_lb.ll create mode 100644 test/Transforms/LoopRotate/PhiSelfReference-1.ll delete mode 100644 test/Transforms/LoopRotate/PhiSelfRefernce-1.ll create mode 100644 test/Transforms/LoopRotate/preserve-loop-simplify.ll create mode 100644 test/Transforms/LoopSimplify/ashr-crash.ll create mode 100644 test/Transforms/LoopSimplify/notify-scev.ll create mode 100644 test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg create mode 100644 test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll create mode 100644 test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll create mode 100644 test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll create mode 100644 test/Transforms/LoopStrengthReduce/X86/pr17473.ll create mode 100644 test/Transforms/LoopStrengthReduce/pr18165.ll create mode 100644 test/Transforms/LoopUnroll/X86/lit.local.cfg create mode 100644 test/Transforms/LoopUnroll/X86/partial.ll create mode 100644 test/Transforms/LoopVectorize/ARM64/gather-cost.ll create mode 100644 test/Transforms/LoopVectorize/ARM64/lit.local.cfg create mode 100644 test/Transforms/LoopVectorize/PowerPC/lit.local.cfg create mode 100644 test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll create mode 100644 test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll create mode 100644 test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll create mode 100644 test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll create mode 100644 test/Transforms/LoopVectorize/X86/metadata-enable.ll create mode 100644 test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll create mode 100644 test/Transforms/LoopVectorize/if-pred-stores.ll create mode 100644 test/Transforms/LoopVectorize/multi-use-reduction-bug.ll create mode 100644 test/Transforms/LoopVectorize/version-mem-access.ll delete mode 100644 test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll delete mode 100644 test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll delete mode 100644 test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll delete mode 100644 test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll delete mode 100644 test/Transforms/LowerInvoke/basictest.ll create mode 100644 test/Transforms/LowerInvoke/lowerinvoke.ll create mode 100644 test/Transforms/MemCpyOpt/memcpy-undef.ll create mode 100644 test/Transforms/ObjCARC/contract-end-of-use-list.ll create mode 100644 test/Transforms/SLPVectorizer/ARM64/lit.local.cfg create mode 100644 test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll create mode 100644 test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll create mode 100644 test/Transforms/SLPVectorizer/X86/extractcost.ll create mode 100644 test/Transforms/SLPVectorizer/X86/intrinsic.ll create mode 100644 test/Transforms/SLPVectorizer/X86/metadata.ll create mode 100644 test/Transforms/SROA/address-spaces.ll create mode 100644 test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof create mode 100644 test/Transforms/SampleProfile/Inputs/bad_fn_header.prof create mode 100644 test/Transforms/SampleProfile/Inputs/bad_line_values.prof create mode 100644 test/Transforms/SampleProfile/Inputs/bad_mangle.prof create mode 100644 test/Transforms/SampleProfile/Inputs/bad_sample_line.prof create mode 100644 test/Transforms/SampleProfile/Inputs/bad_samples.prof create mode 100644 test/Transforms/SampleProfile/Inputs/calls.prof create mode 100644 test/Transforms/SampleProfile/Inputs/discriminator.prof create mode 100644 test/Transforms/SampleProfile/Inputs/propagate.prof create mode 100644 test/Transforms/SampleProfile/Inputs/syntax.prof create mode 100644 test/Transforms/SampleProfile/calls.ll create mode 100644 test/Transforms/SampleProfile/discriminator.ll create mode 100644 test/Transforms/SampleProfile/propagate.ll create mode 100644 test/Transforms/SampleProfile/syntax.ll create mode 100644 test/Transforms/Scalarizer/basic.ll create mode 100644 test/Transforms/Scalarizer/dbginfo.ll create mode 100644 test/Transforms/Scalarizer/no-data-layout.ll create mode 100644 test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll create mode 100644 test/Transforms/SimplifyCFG/speculate-math.ll (limited to 'test/Transforms') diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll new file mode 100644 index 0000000..b12cbee --- /dev/null +++ b/test/Transforms/AddDiscriminators/basic.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -add-discriminators -S | FileCheck %s + +; Basic DWARF discriminator test. All the instructions in block +; 'if.then' should have a different discriminator value than +; the conditional branch at the end of block 'entry'. +; +; Original code: +; +; void foo(int i) { +; int x; +; if (i < 10) x = i; +; } + +define void @foo(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32* %i.addr, align 4, !dbg !10 + %cmp = icmp slt i32 %0, 10, !dbg !10 + br i1 %cmp, label %if.then, label %if.end, !dbg !10 + +if.then: ; preds = %entry + %1 = load i32* %i.addr, align 4, !dbg !10 +; CHECK: %1 = load i32* %i.addr, align 4, !dbg !12 + + store i32 %1, i32* %x, align 4, !dbg !10 +; CHECK: store i32 %1, i32* %x, align 4, !dbg !12 + + br label %if.end, !dbg !10 +; CHECK: br label %if.end, !dbg !12 + +if.end: ; preds = %if.then, %entry + ret void, !dbg !12 +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [basic.c] [DW_LANG_C99] +!1 = metadata !{metadata !"basic.c", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [basic.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.5 "} +!10 = metadata !{i32 3, i32 0, metadata !11, null} +!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [basic.c] +!12 = metadata !{i32 4, i32 0, metadata !4, null} + +; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null} +; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./basic.c] +; CHECK: !14 = metadata !{i32 4, i32 0, metadata !4, null} diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll new file mode 100644 index 0000000..f3b0357 --- /dev/null +++ b/test/Transforms/AddDiscriminators/first-only.ll @@ -0,0 +1,82 @@ +; RUN: opt < %s -add-discriminators -S | FileCheck %s + +; Test that the only instructions that receive a new discriminator in +; the block 'if.then' are those that share the same line number as +; the branch in 'entry'. +; +; Original code: +; +; void foo(int i) { +; int x, y; +; if (i < 10) { x = i; +; y = -i; +; } +; } + +define void @foo(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + %x = alloca i32, align 4 + %y = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32* %i.addr, align 4, !dbg !10 + %cmp = icmp slt i32 %0, 10, !dbg !10 + br i1 %cmp, label %if.then, label %if.end, !dbg !10 + +if.then: ; preds = %entry + %1 = load i32* %i.addr, align 4, !dbg !12 + store i32 %1, i32* %x, align 4, !dbg !12 + + %2 = load i32* %i.addr, align 4, !dbg !14 +; CHECK: %2 = load i32* %i.addr, align 4, !dbg !15 + + %sub = sub nsw i32 0, %2, !dbg !14 +; CHECK: %sub = sub nsw i32 0, %2, !dbg !15 + + store i32 %sub, i32* %y, align 4, !dbg !14 +; CHECK: store i32 %sub, i32* %y, align 4, !dbg !15 + + br label %if.end, !dbg !15 +; CHECK: br label %if.end, !dbg !16 + +if.end: ; preds = %if.then, %entry + ret void, !dbg !16 +; CHECK: ret void, !dbg !17 +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [first-only.c] [DW_LANG_C99] +!1 = metadata !{metadata !"first-only.c", metadata !"."} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [first-only.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"} +!10 = metadata !{i32 3, i32 0, metadata !11, null} + +!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [first-only.c] +; CHECK: !11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} + +!12 = metadata !{i32 3, i32 0, metadata !13, null} + +!13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [first-only.c] +; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !14, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./first-only.c] + +!14 = metadata !{i32 4, i32 0, metadata !13, null} +; CHECK: !14 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1} + +!15 = metadata !{i32 5, i32 0, metadata !13, null} +; CHECK: !15 = metadata !{i32 4, i32 0, metadata !14, null} + +!16 = metadata !{i32 6, i32 0, metadata !4, null} +; CHECK: !16 = metadata !{i32 5, i32 0, metadata !14, null} +; CHECK: !17 = metadata !{i32 6, i32 0, metadata !4, null} + diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll new file mode 100644 index 0000000..0241a0c --- /dev/null +++ b/test/Transforms/AddDiscriminators/multiple.ll @@ -0,0 +1,71 @@ +; RUN: opt < %s -add-discriminators -S | FileCheck %s + +; Discriminator support for multiple CFG paths on the same line. +; +; void foo(int i) { +; int x; +; if (i < 10) x = i; else x = -i; +; } +; +; The two stores inside the if-then-else line must have different discriminator +; values. + +define void @foo(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32* %i.addr, align 4, !dbg !10 + %cmp = icmp slt i32 %0, 10, !dbg !10 + br i1 %cmp, label %if.then, label %if.else, !dbg !10 + +if.then: ; preds = %entry + %1 = load i32* %i.addr, align 4, !dbg !10 +; CHECK: %1 = load i32* %i.addr, align 4, !dbg !12 + + store i32 %1, i32* %x, align 4, !dbg !10 +; CHECK: store i32 %1, i32* %x, align 4, !dbg !12 + + br label %if.end, !dbg !10 +; CHECK: br label %if.end, !dbg !12 + +if.else: ; preds = %entry + %2 = load i32* %i.addr, align 4, !dbg !10 +; CHECK: %2 = load i32* %i.addr, align 4, !dbg !14 + + %sub = sub nsw i32 0, %2, !dbg !10 +; CHECK: %sub = sub nsw i32 0, %2, !dbg !14 + + store i32 %sub, i32* %x, align 4, !dbg !10 +; CHECK: store i32 %sub, i32* %x, align 4, !dbg !14 + + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void, !dbg !12 +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [multiple.c] [DW_LANG_C99] +!1 = metadata !{metadata !"multiple.c", metadata !"."} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [multiple.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"} +!10 = metadata !{i32 3, i32 0, metadata !11, null} +!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [multiple.c] +!12 = metadata !{i32 4, i32 0, metadata !4, null} + +; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null} +; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./multiple.c] +; CHECK: !14 = metadata !{i32 3, i32 0, metadata !15, null} +; CHECK: !15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 2, i32 1} ; [ DW_TAG_lexical_block ] [./multiple.c] diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll new file mode 100644 index 0000000..513a968 --- /dev/null +++ b/test/Transforms/ArgumentPromotion/inalloca.ll @@ -0,0 +1,49 @@ +; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i32 } + +; Argpromote + scalarrepl should change this to passing the two integers by value. +define internal i32 @f(%struct.ss* inalloca %s) { +entry: + %f0 = getelementptr %struct.ss* %s, i32 0, i32 0 + %f1 = getelementptr %struct.ss* %s, i32 0, i32 1 + %a = load i32* %f0, align 4 + %b = load i32* %f1, align 4 + %r = add i32 %a, %b + ret i32 %r +} +; CHECK-LABEL: define internal i32 @f +; CHECK-NOT: load +; CHECK: ret + +define i32 @main() { +entry: + %S = alloca %struct.ss + %f0 = getelementptr %struct.ss* %S, i32 0, i32 0 + %f1 = getelementptr %struct.ss* %S, i32 0, i32 1 + store i32 1, i32* %f0, align 4 + store i32 2, i32* %f1, align 4 + %r = call i32 @f(%struct.ss* inalloca %S) + ret i32 %r +} +; CHECK-LABEL: define i32 @main +; CHECK-NOT: load +; CHECK: ret + +; Argpromote can't promote %a because of the icmp use. +define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { +; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) +entry: + %c = icmp eq %struct.ss* %a, %b + ret i1 %c +} + +define i32 @test() { +entry: + %S = alloca %struct.ss + %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) +; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) + ret i32 0 +} diff --git a/test/Transforms/ArgumentPromotion/tail.ll b/test/Transforms/ArgumentPromotion/tail.ll new file mode 100644 index 0000000..43b8996 --- /dev/null +++ b/test/Transforms/ArgumentPromotion/tail.ll @@ -0,0 +1,20 @@ +; RUN: opt %s -argpromotion -S -o - | FileCheck %s +; PR14710 + +%pair = type { i32, i32 } + +declare i8* @foo(%pair*) + +define internal void @bar(%pair* byval %Data) { +; CHECK: define internal void @bar(i32 %Data.0, i32 %Data.1) +; CHECK: %Data = alloca %pair +; CHECK-NOT: tail +; CHECK: call i8* @foo(%pair* %Data) + tail call i8* @foo(%pair* %Data) + ret void +} + +define void @zed(%pair* byval %Data) { + call void @bar(%pair* byval %Data) + ret void +} diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll index e33ac61..e90900a 100644 --- a/test/Transforms/BBVectorize/simple-int.ll +++ b/test/Transforms/BBVectorize/simple-int.ll @@ -126,8 +126,7 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 -; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #1 -; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #1 +; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0 +; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0 ; CHECK: attributes #0 = { nounwind readnone } -; CHECK: attributes #1 = { nounwind readonly } diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll index 9d82819..598ea0e 100644 --- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll +++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -std-compile-opts -o - | llc -o - | grep bork_directive | wc -l | grep 2 +; RUN: opt < %s -std-compile-opts -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2 ;; We don't want branch folding to fold asm directives. diff --git a/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll new file mode 100644 index 0000000..430b992 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll @@ -0,0 +1,64 @@ +; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The first cast should be sunk into block2, in order that the +; instruction selector can form an efficient +; i64 * i64 -> i128 multiplication. +define i128 @sink(i64* %mem1, i64* %mem2) { +; CHECK-LABEL: block1: +; CHECK-NEXT: load +block1: + %l1 = load i64* %mem1 + %s1 = sext i64 %l1 to i128 + br label %block2 + +; CHECK-LABEL: block2: +; CHECK-NEXT: sext +; CHECK-NEXT: load +; CHECK-NEXT: sext +block2: + %l2 = load i64* %mem2 + %s2 = sext i64 %l2 to i128 + %res = mul i128 %s1, %s2 + ret i128 %res +} + +; The first cast should be hoisted into block1, in order that the +; instruction selector can form an extend-load. +define i64 @hoist(i32* %mem1, i32* %mem2) { +; CHECK-LABEL: block1: +; CHECK-NEXT: load +; CHECK-NEXT: sext +block1: + %l1 = load i32* %mem1 + br label %block2 + +; CHECK-LABEL: block2: +; CHECK-NEXT: load +; CHECK-NEXT: sext +block2: + %s1 = sext i32 %l1 to i64 + %l2 = load i32* %mem2 + %s2 = sext i32 %l2 to i64 + %res = mul i64 %s1, %s2 + ret i64 %res +} + +; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite +; loop. +define i128 @use_ext_source() { +block1: + %v1 = or i64 undef, undef + %v2 = zext i64 %v1 to i128 + br i1 undef, label %block2, label %block3 + +block2: + %v3 = add i64 %v1, 1 + %v4 = zext i64 %v3 to i128 + br label %block3 + +block3: + %res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ] + ret i128 %res +} diff --git a/test/Transforms/CodeGenPrepare/X86/lit.local.cfg b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg new file mode 100644 index 0000000..ba763cf --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll new file mode 100644 index 0000000..e945b03 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -0,0 +1,105 @@ +; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 +; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE2 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin10.9.0" + +define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { +; CHECK-LABEL: @test_8bit +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK-NOT: shufflevector +; CHECK: shl <16 x i8> %lhs, %mask + %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <16 x i8> %mask + +if_false: + %res = shl <16 x i8> %lhs, %mask + ret <16 x i8> %res +} + +define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { +; CHECK-LABEL: @test_16bit +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK: shl <8 x i16> %lhs, [[SPLAT]] + %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <8 x i16> %mask + +if_false: + %res = shl <8 x i16> %lhs, %mask + ret <8 x i16> %res +} + +define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-LABEL: @test_notsplat +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK-NOT: shufflevector +; CHECK: shl <4 x i32> %lhs, %mask + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = shl <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-AVX2-LABEL: @test_32bit +; CHECK-AVX2: if_false: +; CHECK-AVX2-NOT: shufflevector +; CHECK-AVX2: ashr <4 x i32> %lhs, %mask + +; CHECK-SSE2-LABEL: @test_32bit +; CHECK-SSE2: if_false: +; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK-SSE2: ashr <4 x i32> %lhs, [[SPLAT]] + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = ashr <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { +; CHECK-AVX2-LABEL: @test_64bit +; CHECK-AVX2: if_false: +; CHECK-AVX2-NOT: shufflevector +; CHECK-AVX2: lshr <2 x i64> %lhs, %mask + +; CHECK-SSE2-LABEL: @test_64bit +; CHECK-SSE2: if_false: +; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK-SSE2: lshr <2 x i64> %lhs, [[SPLAT]] + + %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <2 x i64> %mask + +if_false: + %res = lshr <2 x i64> %lhs, %mask + ret <2 x i64> %res +} diff --git a/test/Transforms/ConstantHoisting/X86/const-base-addr.ll b/test/Transforms/ConstantHoisting/X86/const-base-addr.ll new file mode 100644 index 0000000..01e6cdf --- /dev/null +++ b/test/Transforms/ConstantHoisting/X86/const-base-addr.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -consthoist < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +%T = type { i32, i32, i32, i32 } + +; Test if even cheap base addresses are hoisted. +define i32 @test1() nounwind { +; CHECK-LABEL: @test1 +; CHECK: %const = bitcast i32 12345678 to i32 +; CHECK: %1 = inttoptr i32 %const to %T* +; CHECK: %addr1 = getelementptr %T* %1, i32 0, i32 1 + %addr1 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1 + %tmp1 = load i32* %addr1 + %addr2 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2 + %tmp2 = load i32* %addr2 + %addr3 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3 + %tmp3 = load i32* %addr3 + %tmp4 = add i32 %tmp1, %tmp2 + %tmp5 = add i32 %tmp3, %tmp4 + ret i32 %tmp5 +} + diff --git a/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll b/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll new file mode 100644 index 0000000..f8e478e --- /dev/null +++ b/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll @@ -0,0 +1,16 @@ +; Test if this compiles without assertions. +; RUN: opt -S -consthoist < %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +%T = type { i32, i32, i32, i32 } + +define i32 @test1() nounwind { + %base = inttoptr i32 12345678 to %T* + %addr1 = getelementptr %T* %base, i32 0, i32 1 + %addr2 = getelementptr %T* %base, i32 0, i32 2 + %addr3 = getelementptr %T* %base, i32 0, i32 3 + ret i32 12345678 +} + diff --git a/test/Transforms/ConstantHoisting/X86/lit.local.cfg b/test/Transforms/ConstantHoisting/X86/lit.local.cfg new file mode 100644 index 0000000..ba763cf --- /dev/null +++ b/test/Transforms/ConstantHoisting/X86/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/Transforms/ConstantHoisting/X86/phi.ll b/test/Transforms/ConstantHoisting/X86/phi.ll new file mode 100644 index 0000000..086df14 --- /dev/null +++ b/test/Transforms/ConstantHoisting/X86/phi.ll @@ -0,0 +1,116 @@ +; RUN: opt -S -consthoist < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; PR18626 +define i8* @test1(i1 %cmp, i64* %tmp) { +entry: + call void @foo(i8* inttoptr (i64 68719476735 to i8*)) + br i1 %cmp, label %if.end, label %return + +if.end: ; preds = %bb1 + call void @foo(i8* inttoptr (i64 68719476736 to i8*)) + br label %return + +return: + %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ] + store i64 1172321806, i64* %tmp + ret i8* %retval.0 + +; CHECK-LABEL: @test1 +; CHECK: if.end: +; CHECK: %2 = inttoptr i64 %const to i8* +; CHECK-NEXT: br +; CHECK: return: +; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ] +} + +define void @test2(i1 %cmp, i64** %tmp) { +entry: + call void @foo(i8* inttoptr (i64 68719476736 to i8*)) + br i1 %cmp, label %if.end, label %return + +if.end: ; preds = %bb1 + call void @foo(i8* inttoptr (i64 68719476736 to i8*)) + br label %return + +return: + store i64* inttoptr (i64 68719476735 to i64*), i64** %tmp + ret void + +; CHECK-LABEL: @test2 +; CHECK: return: +; CHECK-NEXT: %const_mat = add i64 %const, -1 +; CHECK-NEXT: inttoptr i64 %const_mat to i64* +} + +declare void @foo(i8*) + +; PR18768 +define i32 @test3(i1 %c) { +entry: + br i1 %c, label %if.then, label %if.end3 + +if.then: ; preds = %entry + br label %if.end3 + +if.end3: ; preds = %if.then, %entry + %d.0 = phi i32* [ inttoptr (i64 985162435264511 to i32*), %entry ], [ null, %if.then ] + %cmp4 = icmp eq i32* %d.0, inttoptr (i64 985162435264511 to i32*) + %cmp6 = icmp eq i32* %d.0, inttoptr (i64 985162418487296 to i32*) + %or = or i1 %cmp4, %cmp6 + br i1 %or, label %if.then8, label %if.end9 + +if.then8: ; preds = %if.end3 + ret i32 1 + +if.end9: ; preds = %if.then8, %if.end3 + ret i32 undef +} + +; +define i64 @switch_test1(i64 %a) { +; CHECK-LABEL: @switch_test1 +; CHECK: %0 = phi i64 [ %const, %case2 ], [ %const_mat, %Entry ], [ %const_mat, %Entry ] +Entry: + %sel = add i64 %a, 4519019440 + switch i64 %sel, label %fail [ + i64 462, label %continuation + i64 449, label %case2 + i64 443, label %continuation + ] + +case2: + br label %continuation + +continuation: + %0 = phi i64 [ 4519019440, %case2 ], [ 4519019460, %Entry ], [ 4519019460, %Entry ] + ret i64 0; + +fail: + ret i64 -1; +} + +define i64 @switch_test2(i64 %a) { +; CHECK-LABEL: @switch_test2 +; CHECK: %2 = phi i64* [ %1, %case2 ], [ %0, %Entry ], [ %0, %Entry ] +Entry: + %sel = add i64 %a, 4519019440 + switch i64 %sel, label %fail [ + i64 462, label %continuation + i64 449, label %case2 + i64 443, label %continuation + ] + +case2: + br label %continuation + +continuation: + %0 = phi i64* [ inttoptr(i64 4519019440 to i64*), %case2 ], [ inttoptr(i64 4519019460 to i64*), %Entry ], [ inttoptr(i64 4519019460 to i64*), %Entry ] + ret i64 0; + +fail: + ret i64 -1; +} + diff --git a/test/Transforms/ConstantHoisting/X86/stackmap.ll b/test/Transforms/ConstantHoisting/X86/stackmap.ll new file mode 100644 index 0000000..cef022e --- /dev/null +++ b/test/Transforms/ConstantHoisting/X86/stackmap.ll @@ -0,0 +1,17 @@ +; RUN: opt -S -consthoist < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Test if the 3rd argument of a stackmap is hoisted. +define i128 @test1(i128 %a) { +; CHECK-LABEL: @test1 +; CHECK: %const = bitcast i128 13464618275673403322 to i128 +; CHECK: tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 %const) +entry: + %0 = add i128 %a, 13464618275673403322 + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 13464618275673403322) + ret i128 %0 +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) diff --git a/test/Transforms/ConstantMerge/linker-private.ll b/test/Transforms/ConstantMerge/linker-private.ll deleted file mode 100644 index eba7880..0000000 --- a/test/Transforms/ConstantMerge/linker-private.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt < %s -constmerge -S | FileCheck %s -; - -%0 = type opaque -%struct.NSConstantString = type { i32*, i32, i8*, i32 } - -; CHECK: @.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1 - -@isLogVisible = global i8 0, align 1 -@__CFConstantStringClassReference = external global [0 x i32] -@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1 -@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring" -@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1 - -define linkonce_odr void @bar() nounwind ssp align 2 { -entry: - %stack = alloca i8*, align 4 - %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*)) - store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4 - ret void -} - -declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll index acbcf75..665d7db 100644 --- a/test/Transforms/DeadArgElim/deadexternal.ll +++ b/test/Transforms/DeadArgElim/deadexternal.ll @@ -32,7 +32,7 @@ entry: %i = alloca i32, align 4 store volatile i32 10, i32* %i, align 4 ; CHECK: %tmp = load volatile i32* %i, align 4 -; CHECK-next: call void @f(i32 undef) +; CHECK-NEXT: call void @f(i32 undef) %tmp = load volatile i32* %i, align 4 call void @f(i32 %tmp) ret void diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll index 82e01f2..16569db 100644 --- a/test/Transforms/DeadArgElim/keepalive.ll +++ b/test/Transforms/DeadArgElim/keepalive.ll @@ -28,4 +28,20 @@ define void @caller() { ret void } +; We can't remove 'this' here, as that would put argmem in ecx instead of +; memory. +define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem) { + %v = load i32* %argmem + ret i32 %v +} +; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem) + +define i32 @caller2() { + %t = alloca i32 + %m = alloca inalloca i32 + store i32 42, i32* %m + %v = call x86_thiscallcc i32 @unused_this(i32* %t, i32* inalloca %m) + ret i32 %v +} + ; CHECK: attributes #0 = { nounwind } diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll index ec98466..cdfe226 100644 --- a/test/Transforms/DeadStoreElimination/simple.ll +++ b/test/Transforms/DeadStoreElimination/simple.ll @@ -105,6 +105,15 @@ define void @test9(%struct.x* byval %a) nounwind { ; CHECK-NEXT: ret void } +; Test for inalloca handling. +define void @test9_2(%struct.x* inalloca %a) nounwind { + %tmp2 = getelementptr %struct.x* %a, i32 0, i32 0 + store i32 1, i32* %tmp2, align 4 + ret void +; CHECK-LABEL: @test9_2( +; CHECK-NEXT: ret void +} + ; va_arg has fuzzy dependence, the store shouldn't be zapped. define double @test10(i8* %X) { %X_addr = alloca i8* diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll index 110bd03..d2460c0 100644 --- a/test/Transforms/FunctionAttrs/nocapture.ll +++ b/test/Transforms/FunctionAttrs/nocapture.ll @@ -91,6 +91,21 @@ l: ret i32 %val } +; CHECK: define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* nocapture %p, i1 %b) +define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* %p, i1 %b) { +e: + br label %l +l: + %x = phi i32 addrspace(1)* [ %p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = addrspacecast i32 addrspace(1)* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + ; CHECK: define void @nc2(i32* nocapture %p, i32* %q) define void @nc2(i32* %p, i32* %q) { %1 = call i32 @nc1(i32* %q, i32* %p, i1 0) ; [#uses=0] diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll index 0842f56..7ae38bb 100644 --- a/test/Transforms/FunctionAttrs/readattrs.ll +++ b/test/Transforms/FunctionAttrs/readattrs.ll @@ -45,3 +45,9 @@ define void @test6_2(i8** %p, i8* %q) { call void @test6_1() ret void } + +; CHECK: define void @test7_1(i32* inalloca nocapture %a) +; inalloca parameters are always considered written +define void @test7_1(i32* inalloca %a) { + ret void +} diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll index 2f1bd70..04f3f99 100644 --- a/test/Transforms/GCOVProfiling/version.ll +++ b/test/Transforms/GCOVProfiling/version.ll @@ -1,11 +1,11 @@ ; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1 ; RUN: cat %s %t1 > %t2 ; RUN: opt -insert-gcov-profiling -disable-output < %t2 -; RUN: head -c12 %T/version.gcno | grep '^oncg\*204MVLL$' +; RUN: head -c8 %T/version.gcno | grep '^oncg\*204' ; RUN: rm %T/version.gcno ; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2 ; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2 -; RUN: head -c12 %T/version.gcno | grep '^oncg\*704MVLL$' +; RUN: head -c8 %T/version.gcno | grep '^oncg\*704' ; RUN: rm %T/version.gcno define void @test() { diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll index 89d6a5f..fd31fce 100644 --- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll +++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll @@ -53,30 +53,58 @@ bb11: ; preds = %bb7, %bb5 unreachable } -declare i32 @pthread_once(i32*, void ()*) +define i32 @pthread_once(i32*, void ()*) { + ret i32 0 +} -declare i8* @pthread_getspecific(i32) +define i8* @pthread_getspecific(i32) { + ret i8* null +} -declare i32 @pthread_setspecific(i32, i8*) +define i32 @pthread_setspecific(i32, i8*) { + ret i32 0 +} -declare i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) +define i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) { + ret i32 0 +} -declare i32 @pthread_cancel(i32) +define i32 @pthread_cancel(i32) { + ret i32 0 +} -declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) +define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) { + ret i32 0 +} -declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) +define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) { + ret i32 0 +} -declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) +define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) { + ret i32 0 +} -declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*) +define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*) { + ret i32 0 +} -declare i32 @pthread_key_create(i32*, void (i8*)*) +define i32 @pthread_key_create(i32*, void (i8*)*) { + ret i32 0 +} -declare i32 @pthread_key_delete(i32) +define i32 @pthread_key_delete(i32) { + ret i32 0 +} -declare i32 @pthread_mutexattr_init(%struct.__sched_param*) +define i32 @pthread_mutexattr_init(%struct.__sched_param*) { + ret i32 0 +} -declare i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32) +define i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32) { + ret i32 0 +} -declare i32 @pthread_mutexattr_destroy(%struct.__sched_param*) +define i32 @pthread_mutexattr_destroy(%struct.__sched_param*) { + ret i32 0 +} diff --git a/test/Transforms/GVN/unreachable_block_infinite_loop.ll b/test/Transforms/GVN/unreachable_block_infinite_loop.ll index fe335ce..fca5a28 100644 --- a/test/Transforms/GVN/unreachable_block_infinite_loop.ll +++ b/test/Transforms/GVN/unreachable_block_infinite_loop.ll @@ -1,4 +1,4 @@ -; RUN: opt -memdep -gvn -disable-output +; RUN: opt -memdep -gvn -disable-output < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0" diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/Transforms/GlobalMerge/ARM/arm.ll new file mode 100644 index 0000000..8c77de6 --- /dev/null +++ b/test/Transforms/GlobalMerge/ARM/arm.ll @@ -0,0 +1,85 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios3.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +declare i32 @calc(...) #1 + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4 + %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4 + %3 = mul <4 x i32> %2, %1 + store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #2 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0) +} + +attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"LLVM version 3.4 "} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/GlobalMerge/ARM/lit.local.cfg b/test/Transforms/GlobalMerge/ARM/lit.local.cfg new file mode 100644 index 0000000..8a3ba96 --- /dev/null +++ b/test/Transforms/GlobalMerge/ARM/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'ARM' in targets: + config.unsupported = True + diff --git a/test/Transforms/GlobalMerge/ARM64/arm64.ll b/test/Transforms/GlobalMerge/ARM64/arm64.ll new file mode 100644 index 0000000..eea474a --- /dev/null +++ b/test/Transforms/GlobalMerge/ARM64/arm64.ll @@ -0,0 +1,88 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + ret void +} + +declare i32 @calc(...) + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = mul nsw i32 %2, %1 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %9 = mul nsw i32 %8, %7 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4 + %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %12 = mul nsw i32 %11, %10 + store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + %15 = mul nsw i32 %14, %13 + store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #1 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0) +} + +attributes #0 = { nounwind ssp } +attributes #1 = { nounwind readnone ssp } +attributes #2 = { nounwind } diff --git a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg b/test/Transforms/GlobalMerge/ARM64/lit.local.cfg new file mode 100644 index 0000000..a75a42b --- /dev/null +++ b/test/Transforms/GlobalMerge/ARM64/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True + diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll index a1b69ef..d6a565a 100644 --- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll +++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -globalopt -@g = external global i32 +@g = global i32 0 @a = alias bitcast (i32* @g to i8*) diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll index 32f4bf8..2d5a956 100644 --- a/test/Transforms/GlobalOpt/alias-resolve.ll +++ b/test/Transforms/GlobalOpt/alias-resolve.ll @@ -1,31 +1,35 @@ -; We use a temporary file so that the test fails when opt crashes. - -; RUN: opt < %s -globalopt -S > %t -; RUN: FileCheck %s < %t +; RUN: opt < %s -globalopt -S | FileCheck %s @foo1 = alias void ()* @foo2 -; CHECK: @foo1 = alias void ()* @foo2 +; CHECK: @foo1 = alias void ()* @bar2 -@foo2 = alias weak void()* @bar1 -; CHECK: @foo2 = alias weak void ()* @bar2 +@foo2 = alias void()* @bar1 +; CHECK: @foo2 = alias void ()* @bar2 @bar1 = alias void ()* @bar2 ; CHECK: @bar1 = alias void ()* @bar2 -declare void @bar2() -; CHECK: declare void @bar2() +@weak1 = alias weak void ()* @bar2 +; CHECK: @weak1 = alias weak void ()* @bar2 + +define void @bar2() { + ret void +} +; CHECK: define void @bar2() define void @baz() { entry: call void @foo1() -; CHECK: call void @foo2() +; CHECK: call void @bar2() call void @foo2() -; CHECK: call void @foo2() +; CHECK: call void @bar2() call void @bar1() ; CHECK: call void @bar2() + call void @weak1() +; CHECK: call void @weak1() ret void } diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll new file mode 100644 index 0000000..633cd34 --- /dev/null +++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll @@ -0,0 +1,26 @@ +; RUN: opt -S -globalopt < %s | FileCheck %s + +target datalayout = "p:32:32:32-p1:16:16:16" + +@c = addrspace(1) global i8 42 + +@i = internal addrspace(1) global i8 42 + +; CHECK: @ia = internal addrspace(1) global i8 42 +@ia = alias internal i8 addrspace(1)* @i + +@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata" +; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata" + +@llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ia to i8*), i8* addrspacecast (i8 addrspace(1)* @i to i8*)], section "llvm.metadata" +; CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ia to i8*)], section "llvm.metadata" + +@sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)] +; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)] + +@ca = alias internal i8 addrspace(1)* @c +; CHECK: @ca = alias internal i8 addrspace(1)* @c + +define i8 addrspace(1)* @h() { + ret i8 addrspace(1)* @ca +} diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll new file mode 100644 index 0000000..987c4a4 --- /dev/null +++ b/test/Transforms/GlobalOpt/alias-used-section.ll @@ -0,0 +1,8 @@ +; RUN: opt -S -globalopt < %s | FileCheck %s + +@_Z17in_custom_section = internal global i8 42, section "CUSTOM" +@in_custom_section = protected dllexport alias internal i8* @_Z17in_custom_section + +; CHECK: @in_custom_section = internal protected dllexport global i8 42, section "CUSTOM" + +@llvm.used = appending global [1 x i8*] [i8* @in_custom_section], section "llvm.metadata" diff --git a/test/Transforms/GlobalOpt/fastcc.ll b/test/Transforms/GlobalOpt/fastcc.ll new file mode 100644 index 0000000..76122b2 --- /dev/null +++ b/test/Transforms/GlobalOpt/fastcc.ll @@ -0,0 +1,46 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s + +define internal i32 @f(i32* %m) { +; CHECK-LABEL: define internal fastcc i32 @f + %v = load i32* %m + ret i32 %v +} + +define internal x86_thiscallcc i32 @g(i32* %m) { +; CHECK-LABEL: define internal fastcc i32 @g + %v = load i32* %m + ret i32 %v +} + +; Leave this one alone, because the user went out of their way to request this +; convention. +define internal coldcc i32 @h(i32* %m) { +; CHECK-LABEL: define internal coldcc i32 @h + %v = load i32* %m + ret i32 %v +} + +define internal i32 @j(i32* %m) { +; CHECK-LABEL: define internal i32 @j + %v = load i32* %m + ret i32 %v +} + +define void @call_things() { + %m = alloca i32 + call i32 @f(i32* %m) + call x86_thiscallcc i32 @g(i32* %m) + call coldcc i32 @h(i32* %m) + call i32 @j(i32* %m) + ret void +} + +@llvm.used = appending global [1 x i8*] [ + i8* bitcast (i32(i32*)* @j to i8*) +], section "llvm.metadata" + +; CHECK-LABEL: define void @call_things() +; CHECK: call fastcc i32 @f +; CHECK: call fastcc i32 @g +; CHECK: call coldcc i32 @h +; CHECK: call i32 @j diff --git a/test/Transforms/GlobalOpt/memset.ll b/test/Transforms/GlobalOpt/memset.ll index 3bb5ce9..85320b7 100644 --- a/test/Transforms/GlobalOpt/memset.ll +++ b/test/Transforms/GlobalOpt/memset.ll @@ -1,6 +1,8 @@ -; both globals are write only, delete them. +; RUN: opt -S -globalopt < %s | FileCheck %s -; RUN: opt < %s -globalopt -S | not grep internal +; CHECK-NOT: internal + +; Both globals are write only, delete them. @G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00" ; <[58 x i8]*> [#uses=1] @G1 = internal global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ] ; <[4 x i32]*> [#uses=1] @@ -13,6 +15,17 @@ define void @foo() { ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +@G0_as1 = internal addrspace(1) global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00" ; <[58 x i8]*> [#uses=1] +@G1_as1 = internal addrspace(1) global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ] ; <[4 x i32]*> [#uses=1] + +define void @foo_as1() { + %Blah = alloca [58 x i8] + %tmp3 = bitcast [58 x i8]* %Blah to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* addrspacecast ([4 x i32] addrspace(1)* @G1_as1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false) + call void @llvm.memset.p1i8.i32(i8 addrspace(1)* getelementptr inbounds ([58 x i8] addrspace(1)* @G0_as1, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false) + ret void +} +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i32, i1) nounwind \ No newline at end of file diff --git a/test/Transforms/IndVarSimplify/iv-widen.ll b/test/Transforms/IndVarSimplify/iv-widen.ll new file mode 100644 index 0000000..c899e2f --- /dev/null +++ b/test/Transforms/IndVarSimplify/iv-widen.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -indvars -S | FileCheck %s + +target triple = "x86_64-apple-darwin" + +; CHECK-LABEL: @sloop +; CHECK-LABEL: B18: +; Only one phi now. +; CHECK: phi +; CHECK-NOT: phi +; One trunc for the gep. +; CHECK: trunc i64 %indvars.iv to i32 +; One trunc for the dummy() call. +; CHECK-LABEL: exit24: +; CHECK: trunc i64 {{.*}}lcssa.wide to i32 +define void @sloop(i32* %a) { +Prologue: + br i1 undef, label %B18, label %B6 + +B18: ; preds = %B24, %Prologue + %.02 = phi i32 [ 0, %Prologue ], [ %tmp33, %B24 ] + %tmp23 = zext i32 %.02 to i64 + %tmp33 = add i32 %.02, 1 + %o = getelementptr i32* %a, i32 %.02 + %v = load i32* %o + %t = icmp eq i32 %v, 0 + br i1 %t, label %exit24, label %B24 + +B24: ; preds = %B18 + %t2 = icmp eq i32 %tmp33, 20 + br i1 %t2, label %B6, label %B18 + +B6: ; preds = %Prologue + ret void + +exit24: ; preds = %B18 + call void @dummy(i32 %.02) + unreachable +} + +declare void @dummy(i32) diff --git a/test/Transforms/IndVarSimplify/lcssa-preservation.ll b/test/Transforms/IndVarSimplify/lcssa-preservation.ll new file mode 100644 index 0000000..f69c96c --- /dev/null +++ b/test/Transforms/IndVarSimplify/lcssa-preservation.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -indvars -S | FileCheck %s +; +; Make sure IndVars preserves LCSSA form, especially across loop nests. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define void @PR18642(i32 %x) { +; CHECK-LABEL: @PR18642( +entry: + br label %outer.header +; CHECK: br label %outer.header + +outer.header: +; CHECK: outer.header: + %outer.iv = phi i32 [ 0, %entry ], [ %x, %outer.latch ] + br label %inner.header +; CHECK: %[[SCEV_EXPANDED:.*]] = add i32 +; CHECK: br label %inner.header + +inner.header: +; CHECK: inner.header: + %inner.iv = phi i32 [ undef, %outer.header ], [ %inc, %inner.latch ] + %cmp1 = icmp slt i32 %inner.iv, %outer.iv + br i1 %cmp1, label %inner.latch, label %outer.latch +; CHECK: br i1 {{.*}}, label %inner.latch, label %outer.latch + +inner.latch: +; CHECK: inner.latch: + %inc = add nsw i32 %inner.iv, 1 + %cmp2 = icmp slt i32 %inner.iv, %outer.iv + br i1 %cmp2, label %inner.header, label %exit +; CHECK: br i1 {{.*}}, label %inner.header, label %[[EXIT_FROM_INNER:.*]] + +outer.latch: +; CHECK: outer.latch: + br i1 undef, label %outer.header, label %exit +; CHECK: br i1 {{.*}}, label %outer.header, label %[[EXIT_FROM_OUTER:.*]] + +; CHECK: [[EXIT_FROM_INNER]]: +; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[SCEV_EXPANDED]], %inner.latch ] +; CHECK-NEXT: br label %exit + +; CHECK: [[EXIT_FROM_OUTER]]: +; CHECK-NEXT: br label %exit + +exit: +; CHECK: exit: + %exit.phi = phi i32 [ %inc, %inner.latch ], [ undef, %outer.latch ] +; CHECK-NEXT: phi i32 [ %[[LCSSA]], %[[EXIT_FROM_INNER]] ], [ undef, %[[EXIT_FROM_OUTER]] ] + ret void +} diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll index 2fac4a7..4736f85 100644 --- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll +++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll @@ -1,6 +1,6 @@ ;RUN: opt -S %s -indvars | FileCheck %s -; CHECK-LABEL-LABEL: @foo( +; CHECK-LABEL: @foo( ; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16 ; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512 define void @foo() #0 { @@ -20,7 +20,7 @@ for.end: ; preds = %for.body } ; Check that post-incrementing the backedge taken count does not overflow. -; CHECK-LABEL-LABEL: @postinc( +; CHECK-LABEL: @postinc( ; CHECK: icmp eq i32 %indvars.iv.next, 256 define i32 @postinc() #0 { entry: diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll index fe3df5c..1fdcdd1 100644 --- a/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -38,17 +38,16 @@ for.end: ret void } -; It would be nice if SCEV and any loop analysis could assume that -; preheaders exist. Unfortunately it is not always the case. This test -; checks that SCEVExpander can handle an outer loop that has not yet -; been simplified. As a result, the inner loop's exit test will not be -; rewritten. +; This test checks that SCEVExpander can handle an outer loop that has been +; simplified, and as a result the inner loop's exit test will be rewritten. define void @expandOuterRecurrence(i32 %arg) nounwind { entry: %sub1 = sub nsw i32 %arg, 1 %cmp1 = icmp slt i32 0, %sub1 br i1 %cmp1, label %outer, label %exit +; CHECK: outer: +; CHECK: icmp slt outer: %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] %sub2 = sub nsw i32 %arg, %i @@ -60,7 +59,6 @@ inner.ph: br label %inner ; CHECK: inner: -; CHECK: icmp slt ; CHECK: br i1 inner: %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] diff --git a/test/Transforms/IndVarSimplify/overflowcheck.ll b/test/Transforms/IndVarSimplify/overflowcheck.ll new file mode 100644 index 0000000..2603f36 --- /dev/null +++ b/test/Transforms/IndVarSimplify/overflowcheck.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +; CHECK-LABEL: @addwithoverflow +; CHECK-LABEL: loop1: +; CHECK-NOT: zext +; CHECK: add nsw +; CHECK: @llvm.sadd.with.overflow +; CHECK-LABEL: loop2: +; CHECK-NOT: extractvalue +; CHECK: add nuw nsw +; CHECK: @llvm.sadd.with.overflow +; CHECK-LABEL: loop3: +; CHECK-NOT: extractvalue +; CHECK: ret +define i64 @addwithoverflow(i32 %n, i64* %a) { +entry: + br label %loop0 + +loop0: + %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ] + %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ] + %bc = icmp ult i32 %i, %n + br i1 %bc, label %loop1, label %exit + +loop1: + %zxt = zext i32 %i to i64 + %ofs = shl nuw nsw i64 %zxt, 3 + %gep = getelementptr i64* %a, i64 %zxt + %v = load i64* %gep, align 8 + %truncv = trunc i64 %v to i32 + %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv) + %ovflows = extractvalue { i32, i1 } %adds, 1 + br i1 %ovflows, label %exit, label %loop2 + +loop2: + %addsval = extractvalue { i32, i1 } %adds, 0 + %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1) + %i1check = extractvalue { i32, i1 } %i1, 1 + br i1 %i1check, label %exit, label %loop3 + +loop3: + %i1val = extractvalue { i32, i1 } %i1, 0 + %test = icmp slt i32 %i1val, %n + br i1 %test, label %return, label %loop0 + +return: + %ret = zext i32 %addsval to i64 + ret i64 %ret + +exit: + unreachable +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) diff --git a/test/Transforms/IndVarSimplify/tripcount_compute.ll b/test/Transforms/IndVarSimplify/tripcount_compute.ll index 626a29b..966d152 100644 --- a/test/Transforms/IndVarSimplify/tripcount_compute.ll +++ b/test/Transforms/IndVarSimplify/tripcount_compute.ll @@ -160,3 +160,34 @@ loop9: ; preds = %loop2, %loopexit loopexit9: ; preds = %loop2 ret i32 %l.next } + +; PR18449. Check that the early exit is reduced to never taken. +; +; CHECK-LABEL: @twoexit +; CHECK-LABEL: loop: +; CHECK: phi +; CHECK: br i1 false +; CHECK: br +; CHECK: ret +define void @twoexit() { +"function top level": + br label %loop + +loop: ; preds = %body, %"function top level" + %0 = phi i64 [ 0, %"function top level" ], [ %2, %body ] + %1 = icmp ugt i64 %0, 2 + br i1 %1, label %fail, label %body + +fail: ; preds = %loop + tail call void @bounds_fail() + unreachable + +body: ; preds = %loop + %2 = add i64 %0, 1 + %3 = icmp slt i64 %2, 3 + br i1 %3, label %loop, label %out + +out: ; preds = %body + ret void +} +declare void @bounds_fail() diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll new file mode 100644 index 0000000..543a89b --- /dev/null +++ b/test/Transforms/Inline/ignore-debug-info.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -S -inline -inline-threshold=2 | FileCheck %s +; RUN: opt < %s -S -strip-debug -inline -inline-threshold=2 | FileCheck %s +; +; The purpose of this test is to check that debug info doesn't influence +; inlining decisions. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.dbg.declare(metadata, metadata) #1 +declare void @llvm.dbg.value(metadata, i64, metadata) #1 + +define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) { +entry: + call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}) + %mul = fmul <4 x float> %a, + call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}) + %mul1 = fmul <4 x float> %b, + call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}) + %add = fadd <4 x float> %mul, %mul1 + ret <4 x float> %add +} + +define float @outer_vectors(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @outer_vectors( +; CHECK-NOT: call <4 x float> @inner_vectors( +; CHECK: ret float + +entry: + call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}) + call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}) + %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) + call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}) + %vecext = extractelement <4 x float> %call, i32 0 + %vecext1 = extractelement <4 x float> %call, i32 1 + %add = fadd float %vecext, %vecext1 + %vecext2 = extractelement <4 x float> %call, i32 2 + %add3 = fadd float %add, %vecext2 + %vecext4 = extractelement <4 x float> %call, i32 3 + %add5 = fadd float %add3, %vecext4 + ret float %add5 +} + +attributes #0 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !{}, metadata !2, metadata !2, metadata !""} +!1 = metadata !{metadata !"", metadata !""} +!2 = metadata !{i32 0} +!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!5 = metadata !{metadata !""} diff --git a/test/Transforms/Inline/inline-cold.ll b/test/Transforms/Inline/inline-cold.ll new file mode 100644 index 0000000..bb8c008 --- /dev/null +++ b/test/Transforms/Inline/inline-cold.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -inline -S -inlinecold-threshold=75 | FileCheck %s + +; Test that functions with attribute Cold are not inlined while the +; same function without attribute Cold will be inlined. + +@a = global i32 4 + +; This function should be larger than the cold threshold (75), but smaller +; than the regular threshold. +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #0 { +entry: + %a1 = load volatile i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind cold readnone uwtable +define i32 @ColdFunction(i32 %a) #1 { +; CHECK-LABEL: @ColdFunction +; CHECK: ret +entry: + %a1 = load volatile i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +; CHECK-LABEL: @bar +; CHECK: call i32 @ColdFunction(i32 5) +; CHECK-NOT: call i32 @simpleFunction(i32 6) +; CHECK: ret +entry: + %0 = tail call i32 @ColdFunction(i32 5) + %1 = tail call i32 @simpleFunction(i32 6) + %add = add i32 %0, %1 + ret i32 %add +} + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll index c394138..c53bb5a 100644 --- a/test/Transforms/Inline/inline_invoke.ll +++ b/test/Transforms/Inline/inline_invoke.ll @@ -96,7 +96,6 @@ eh.resume: ; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) -; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]]) ; CHECK-NEXT: to label %[[LBL:[^\s]+]] unwind ; CHECK: [[LBL]]: @@ -167,7 +166,6 @@ eh.resume: ; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) -; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]]) ; CHECK-NEXT: to label %[[RESUME1:[^\s]+]] unwind ; CHECK: [[RESUME1]]: @@ -187,7 +185,6 @@ eh.resume: ; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) -; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]]) ; CHECK-NEXT: to label %[[RESUME2:[^\s]+]] unwind ; CHECK: [[RESUME2]]: @@ -275,7 +272,6 @@ lpad.cont: ; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) -; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev( ; CHECK-NEXT: to label %[[L:[^\s]+]] unwind ; CHECK: [[L]]: @@ -322,7 +318,6 @@ terminate: ; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) -; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev( ; CHECK-NEXT: to label %[[L:[^\s]+]] unwind ; CHECK: [[L]]: diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll index 678ee82..3604264 100644 --- a/test/Transforms/Inline/inline_returns_twice.ll +++ b/test/Transforms/Inline/inline_returns_twice.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; Check that functions with "returns_twice" calls are only inlined, -; if they are themselve marked as such. +; if they are themselves marked as such. declare i32 @a() returns_twice diff --git a/test/Transforms/Inline/invoke-cleanup.ll b/test/Transforms/Inline/invoke-cleanup.ll new file mode 100644 index 0000000..457ae2a --- /dev/null +++ b/test/Transforms/Inline/invoke-cleanup.ll @@ -0,0 +1,39 @@ +; RUN: opt %s -inline -S | FileCheck %s + +declare void @external_func() + +@exception_type1 = external global i8 +@exception_type2 = external global i8 + + +define internal void @inner() { + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_type1 + resume i32 %lp +} + +; Test that the "cleanup" clause is kept when inlining @inner() into +; this call site (PR17872), otherwise C++ destructors will not be +; called when they should be. + +define void @outer() { + invoke void @inner() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + cleanup + catch i8* @exception_type2 + resume i32 %lp +} +; CHECK: define void @outer +; CHECK: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* @exception_type1 +; CHECK-NEXT: catch i8* @exception_type2 diff --git a/test/Transforms/Inline/invoke-combine-clauses.ll b/test/Transforms/Inline/invoke-combine-clauses.ll new file mode 100644 index 0000000..5f06039 --- /dev/null +++ b/test/Transforms/Inline/invoke-combine-clauses.ll @@ -0,0 +1,117 @@ +; RUN: opt %s -inline -S | FileCheck %s + +declare void @external_func() +declare void @abort() + +@exception_inner = external global i8 +@exception_outer = external global i8 +@condition = external global i1 + + +; Check for a bug in which multiple "resume" instructions in the +; inlined function caused "catch i8* @exception_outer" to appear +; multiple times in the resulting landingpad. + +define internal void @inner_multiple_resume() { + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_inner + %cond = load i1* @condition + br i1 %cond, label %resume1, label %resume2 +resume1: + resume i32 1 +resume2: + resume i32 2 +} + +define void @outer_multiple_resume() { + invoke void @inner_multiple_resume() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_outer + resume i32 %lp +} +; CHECK: define void @outer_multiple_resume() +; CHECK: %lp.i = landingpad +; CHECK-NEXT: catch i8* @exception_inner +; CHECK-NEXT: catch i8* @exception_outer +; Check that there isn't another "catch" clause: +; CHECK-NEXT: load + + +; Check for a bug in which having a "resume" and a "call" in the +; inlined function caused "catch i8* @exception_outer" to appear +; multiple times in the resulting landingpad. + +define internal void @inner_resume_and_call() { + call void @external_func() + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_inner + resume i32 %lp +} + +define void @outer_resume_and_call() { + invoke void @inner_resume_and_call() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_outer + resume i32 %lp +} +; CHECK: define void @outer_resume_and_call() +; CHECK: %lp.i = landingpad +; CHECK-NEXT: catch i8* @exception_inner +; CHECK-NEXT: catch i8* @exception_outer +; Check that there isn't another "catch" clause: +; CHECK-NEXT: br + + +; Check what happens if the inlined function contains an "invoke" but +; no "resume". In this case, the inlined landingpad does not need to +; include the "catch i8* @exception_outer" clause from the outer +; function (since the outer function's landingpad will not be +; reachable), but it's OK to include this clause. + +define internal void @inner_no_resume_or_call() { + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_inner + ; A landingpad might have no "resume" if a C++ destructor aborts. + call void @abort() noreturn nounwind + unreachable +} + +define void @outer_no_resume_or_call() { + invoke void @inner_no_resume_or_call() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 personality i8* null + catch i8* @exception_outer + resume i32 %lp +} +; CHECK: define void @outer_no_resume_or_call() +; CHECK: %lp.i = landingpad +; CHECK-NEXT: catch i8* @exception_inner +; CHECK-NEXT: catch i8* @exception_outer +; Check that there isn't another "catch" clause: +; CHECK-NEXT: call void @abort() diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll index af42bc7..46c3bcd 100644 --- a/test/Transforms/Inline/ptr-diff.ll +++ b/test/Transforms/Inline/ptr-diff.ll @@ -31,7 +31,7 @@ else: define i32 @outer2(i32* %ptr) { ; Test that an inbounds GEP disables this -- it isn't safe in general as -; wrapping changes the behavior of lessthan and greaterthan comparisions. +; wrapping changes the behavior of lessthan and greaterthan comparisons. ; CHECK-LABEL: @outer2( ; CHECK: call i32 @inner2 ; CHECK: ret i32 diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll index c27fe0a..7f9bd9e 100644 --- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll +++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll @@ -3,7 +3,9 @@ @__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel ; [#uses=1] @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*) ; [#uses=1] -declare extern_weak i32 @pthread_cancel(i32) +define weak i32 @pthread_cancel(i32) { + ret i32 0 +} define i1 @__gthread_active_p() { entry: diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll index 23ee12b..c7cef75 100644 --- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll +++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll @@ -3,7 +3,9 @@ @A = alias weak void ()* @B ; [#uses=1] -declare extern_weak void @B() +define weak void @B() { + ret void +} define i32 @active() { entry: diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll index 2dedd44..1883a8f 100644 --- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll +++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll @@ -1,6 +1,3 @@ -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -target triple = "thumbv7-apple-ios0" - ; RUN: opt -S -instcombine < %s | FileCheck %s define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { @@ -67,6 +64,72 @@ entry: declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +; ARM64 variants - + +define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +} + +define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @constantMulARM64() nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulSARM64() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulUARM64() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) nounwind + %b = add <4 x i32> zeroinitializer, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + %b = add <4 x i32> %x, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %b = add <4 x i32> %x, +; CHECK-NEXT: ret <4 x i32> %b +} + +declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone + ; CHECK: attributes #0 = { nounwind readnone ssp } ; CHECK: attributes #1 = { nounwind readnone } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll index 0964bc0..67d560e 100644 --- a/test/Transforms/InstCombine/add2.ll +++ b/test/Transforms/InstCombine/add2.ll @@ -41,3 +41,38 @@ define i32 @test4(i32 %A) { ; CHECK-NEXT: ret i32 %B } +define <2 x i1> @test5(<2 x i1> %A, <2 x i1> %B) { + %add = add <2 x i1> %A, %B + ret <2 x i1> %add +; CHECK-LABEL: @test5( +; CHECK-NEXT: %add = xor <2 x i1> %A, %B +; CHECK-NEXT: ret <2 x i1> %add +} + +define <2 x i64> @test6(<2 x i64> %A) { + %shl = shl <2 x i64> %A, + %add = add <2 x i64> %shl, %A + ret <2 x i64> %add +; CHECK-LABEL: @test6( +; CHECK-NEXT: %add = mul <2 x i64> %A, +; CHECK-NEXT: ret <2 x i64> %add +} + +define <2 x i64> @test7(<2 x i64> %A) { + %shl = shl <2 x i64> %A, + %mul = mul <2 x i64> %A, + %add = add <2 x i64> %shl, %mul + ret <2 x i64> %add +; CHECK-LABEL: @test7( +; CHECK-NEXT: %add = mul <2 x i64> %A, +; CHECK-NEXT: ret <2 x i64> %add +} + +define <2 x i64> @test8(<2 x i64> %A) { + %xor = xor <2 x i64> %A, + %add = add <2 x i64> %xor, + ret <2 x i64> %add +; CHECK-LABEL: @test8( +; CHECK-NEXT: %add = sub <2 x i64> , %A +; CHECK-NEXT: ret <2 x i64> %add +} diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll index 208c7f0..f9b7e3b 100644 --- a/test/Transforms/InstCombine/add4.ll +++ b/test/Transforms/InstCombine/add4.ll @@ -77,3 +77,26 @@ define float @test7(float %A, float %B, i32 %C) { ; CHECK: uitofp } +define <4 x float> @test8(<4 x float> %A, <4 x float> %B, <4 x i1> %C) { + ;; B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B + %cf = uitofp <4 x i1> %C to <4 x float> + %mc = fsub fast <4 x float> , %cf + %p1 = fmul fast <4 x float> %A, %mc + %p2 = fmul fast <4 x float> %B, %cf + %s1 = fadd fast <4 x float> %p2, %p1 + ret <4 x float> %s1 +; CHECK-LABEL: @test8( +; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A +} + +define <4 x float> @test9(<4 x float> %A, <4 x float> %B, <4 x i1> %C) { + ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B + %cf = uitofp <4 x i1> %C to <4 x float> + %mc = fsub fast <4 x float> , %cf + %p1 = fmul fast <4 x float> %A, %mc + %p2 = fmul fast <4 x float> %B, %cf + %s1 = fadd fast <4 x float> %p1, %p2 + ret <4 x float> %s1 +; CHECK-LABEL: @test9 +; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A +} diff --git a/test/Transforms/InstCombine/bitcast-store.ll b/test/Transforms/InstCombine/bitcast-store.ll index e4a61e9..e46b5c8 100644 --- a/test/Transforms/InstCombine/bitcast-store.ll +++ b/test/Transforms/InstCombine/bitcast-store.ll @@ -3,14 +3,14 @@ ; Instcombine should preserve metadata and alignment while ; folding a bitcast into a store. -; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0 - target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" %struct.A = type { i32 (...)** } @G = external constant [5 x i8*] +; CHECK-LABEL: @foo +; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0 define void @foo(%struct.A* %a) nounwind { entry: %0 = bitcast %struct.A* %a to i8*** @@ -18,4 +18,18 @@ entry: ret void } +; Check instcombine doesn't try and fold the following bitcast into the store. +; This transformation would not be safe since we would need to use addrspacecast +; and addrspacecast is not guaranteed to be a no-op cast. + +; CHECK-LABEL: @bar +; CHECK: %cast = bitcast i8** %b to i8 addrspace(1)** +; CHECK: store i8 addrspace(1)* %a, i8 addrspace(1)** %cast +define void @bar(i8 addrspace(1)* %a, i8** %b) nounwind { +entry: + %cast = bitcast i8** %b to i8 addrspace(1)** + store i8 addrspace(1)* %a, i8 addrspace(1)** %cast + ret void +} + !0 = metadata !{metadata !"hello"} diff --git a/test/Transforms/InstCombine/call-cast-target-inalloca.ll b/test/Transforms/InstCombine/call-cast-target-inalloca.ll new file mode 100644 index 0000000..baf97e0 --- /dev/null +++ b/test/Transforms/InstCombine/call-cast-target-inalloca.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32" +target triple = "i686-pc-linux-gnu" + +declare void @takes_i32(i32) +declare void @takes_i32_inalloca(i32* inalloca) + +define void @f() { +; CHECK-LABEL: define void @f() + %args = alloca i32 + call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args) +; CHECK: call void bitcast + ret void +} + +define void @g() { +; CHECK-LABEL: define void @g() + call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) +; CHECK: call void bitcast + ret void +} diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll index 315c516..1af3317 100644 --- a/test/Transforms/InstCombine/call-cast-target.ll +++ b/test/Transforms/InstCombine/call-cast-target.ll @@ -13,3 +13,15 @@ entry: declare i8* @ctime(i32*) +define internal { i8 } @foo(i32*) { +entry: + ret { i8 } { i8 0 } +} + +define void @test_struct_ret() { +; CHECK-LABEL: @test_struct_ret +; CHECK-NOT: bitcast +entry: + %0 = call { i8 } bitcast ({ i8 } (i32*)* @foo to { i8 } (i16*)*)(i16* null) + ret void +} diff --git a/test/Transforms/InstCombine/cast-call-combine.ll b/test/Transforms/InstCombine/cast-call-combine.ll new file mode 100644 index 0000000..be70a87 --- /dev/null +++ b/test/Transforms/InstCombine/cast-call-combine.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -always-inline -instcombine -S | FileCheck %s + +define internal void @foo(i16*) alwaysinline { + ret void +} + +define void @bar() noinline noreturn { + unreachable +} + +define void @test() { + br i1 false, label %then, label %else + +then: + call void @bar() + unreachable + +else: + ; CHECK-NOT: call + call void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null) + ret void +} + diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll index 8934404..47ba920 100644 --- a/test/Transforms/InstCombine/cast-set.ll +++ b/test/Transforms/InstCombine/cast-set.ll @@ -10,6 +10,7 @@ define i1 @test1(i32 %X) { ; Convert to setne int %X, 12 %c = icmp ne i32 %A, 12 ; [#uses=1] ret i1 %c +; CHECK-LABEL @test1( ; CHECK: %c = icmp ne i32 %X, 12 ; CHECK: ret i1 %c } @@ -20,6 +21,7 @@ define i1 @test2(i32 %X, i32 %Y) { ; Convert to setne int %X, %Y %c = icmp ne i32 %A, %B ; [#uses=1] ret i1 %c +; CHECK-LABEL @test2( ; CHECK: %c = icmp ne i32 %X, %Y ; CHECK: ret i1 %c } @@ -29,6 +31,7 @@ define i32 @test4(i32 %A) { %C = shl i32 %B, 2 ; [#uses=1] %D = bitcast i32 %C to i32 ; [#uses=1] ret i32 %D +; CHECK-LABEL: @test4( ; CHECK: %C = shl i32 %A, 2 ; CHECK: ret i32 %C } @@ -38,6 +41,7 @@ define i16 @test5(i16 %A) { %C = and i32 %B, 15 ; [#uses=1] %D = trunc i32 %C to i16 ; [#uses=1] ret i16 %D +; CHECK-LABEL: @test5( ; CHECK: %C = and i16 %A, 15 ; CHECK: ret i16 %C } @@ -46,6 +50,7 @@ define i1 @test6(i1 %A) { %B = zext i1 %A to i32 ; [#uses=1] %C = icmp ne i32 %B, 0 ; [#uses=1] ret i1 %C +; CHECK-LABEL: @test6( ; CHECK: ret i1 %A } @@ -53,6 +58,7 @@ define i1 @test6a(i1 %A) { %B = zext i1 %A to i32 ; [#uses=1] %C = icmp ne i32 %B, -1 ; [#uses=1] ret i1 %C +; CHECK-LABEL: @test6a( ; CHECK: ret i1 true } @@ -60,6 +66,7 @@ define i1 @test7(i8* %A) { %B = bitcast i8* %A to i32* ; [#uses=1] %C = icmp eq i32* %B, null ; [#uses=1] ret i1 %C +; CHECK-LABEL: @test7( ; CHECK: %C = icmp eq i8* %A, null ; CHECK: ret i1 %C } diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll index cac0ec1..4fab92f 100644 --- a/test/Transforms/InstCombine/cast.ll +++ b/test/Transforms/InstCombine/cast.ll @@ -1,6 +1,6 @@ ; Tests to make sure elimination of casts is working correctly ; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" +target datalayout = "E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" @inbuf = external global [32832 x i8] ; <[32832 x i8]*> [#uses=1] @@ -708,6 +708,34 @@ define %s @test68(%s *%p, i64 %i) { ; CHECK-NEXT: ret %s } +; addrspacecasts should be eliminated. +define %s @test68_addrspacecast(%s* %p, i64 %i) { +; CHECK-LABEL: @test68_addrspacecast( +; CHECK-NEXT: getelementptr %s* +; CHECK-NEXT: load %s* +; CHECK-NEXT: ret %s + %o = mul i64 %i, 12 + %q = addrspacecast %s* %p to i8 addrspace(2)* + %pp = getelementptr inbounds i8 addrspace(2)* %q, i64 %o + %r = addrspacecast i8 addrspace(2)* %pp to %s* + %l = load %s* %r + ret %s %l +} + +define %s @test68_addrspacecast_2(%s* %p, i64 %i) { +; CHECK-LABEL: @test68_addrspacecast_2( +; CHECK-NEXT: getelementptr %s* %p +; CHECK-NEXT: addrspacecast +; CHECK-NEXT: load %s addrspace(1)* +; CHECK-NEXT: ret %s + %o = mul i64 %i, 12 + %q = addrspacecast %s* %p to i8 addrspace(2)* + %pp = getelementptr inbounds i8 addrspace(2)* %q, i64 %o + %r = addrspacecast i8 addrspace(2)* %pp to %s addrspace(1)* + %l = load %s addrspace(1)* %r + ret %s %l +} + define %s @test68_as1(%s addrspace(1)* %p, i32 %i) { ; CHECK-LABEL: @test68_as1( %o = mul i32 %i, 12 @@ -903,6 +931,33 @@ define double @test80([100 x double]* %p, i32 %i) { ; CHECK-NEXT: ret double } +define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) { +; CHECK-LABEL: @test80_addrspacecast( +; CHECK-NEXT: getelementptr [100 x double] addrspace(1)* %p +; CHECK-NEXT: load double addrspace(1)* +; CHECK-NEXT: ret double + %tmp = mul nsw i32 %i, 8 + %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)* + %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp + %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)* + %l = load double addrspace(1)* %r + ret double %l +} + +define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) { +; CHECK-LABEL: @test80_addrspacecast_2( +; CHECK-NEXT: getelementptr [100 x double] addrspace(1)* +; CHECK-NEXT: addrspacecast double addrspace(1)* +; CHECK-NEXT: load double addrspace(3)* +; CHECK-NEXT: ret double + %tmp = mul nsw i32 %i, 8 + %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)* + %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp + %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)* + %l = load double addrspace(3)* %r + ret double %l +} + define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) { ; CHECK-LABEL: @test80_as1( %tmp = mul nsw i16 %i, 8 diff --git a/test/Transforms/InstCombine/ceil.ll b/test/Transforms/InstCombine/ceil.ll new file mode 100644 index 0000000..9f965a3 --- /dev/null +++ b/test/Transforms/InstCombine/ceil.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare float @llvm.ceil.f32(float) #0 +declare double @llvm.ceil.f64(double) #0 +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #0 + +; CHECK-LABEL: @constant_fold_ceil_f32_01 +; CHECK-NEXT: ret float 1.000000e+00 +define float @constant_fold_ceil_f32_01() #0 { + %x = call float @llvm.ceil.f32(float 1.00) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_ceil_f32_02 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_ceil_f32_02() #0 { + %x = call float @llvm.ceil.f32(float 1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_ceil_f32_03 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_ceil_f32_03() #0 { + %x = call float @llvm.ceil.f32(float -1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_ceil_v4f32_01 +; CHECK-NEXT: ret <4 x float> +define <4 x float> @constant_fold_ceil_v4f32_01() #0 { + %x = call <4 x float> @llvm.ceil.v4f32(<4 x float> ) + ret <4 x float> %x +} + +; CHECK-LABEL: @constant_fold_ceil_f64_01 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_ceil_f64_01() #0 { + %x = call double @llvm.ceil.f64(double 1.0) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_ceil_f64_02 +; CHECK-NEXT: ret double 2.000000e+00 +define double @constant_fold_ceil_f64_02() #0 { + %x = call double @llvm.ceil.f64(double 1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_ceil_f64_03 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_ceil_f64_03() #0 { + %x = call double @llvm.ceil.f64(double -1.75) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll new file mode 100644 index 0000000..14377df --- /dev/null +++ b/test/Transforms/InstCombine/constant-fold-math.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare float @llvm.fma.f32(float, float, float) #0 +declare float @llvm.fmuladd.f32(float, float, float) #0 +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 + +declare double @llvm.fma.f64(double, double, double) #0 +declare double @llvm.fmuladd.f64(double, double, double) #0 + + + +; CHECK-LABEL: @constant_fold_fma_f32 +; CHECK-NEXT: ret float 6.000000e+00 +define float @constant_fold_fma_f32() #0 { + %x = call float @llvm.fma.f32(float 1.0, float 2.0, float 4.0) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_fma_v4f32 +; CHECK-NEXT: ret <4 x float> +define <4 x float> @constant_fold_fma_v4f32() #0 { + %x = call <4 x float> @llvm.fma.v4f32(<4 x float> , <4 x float> , <4 x float> ) + ret <4 x float> %x +} + +; CHECK-LABEL: @constant_fold_fmuladd_f32 +; CHECK-NEXT: ret float 6.000000e+00 +define float @constant_fold_fmuladd_f32() #0 { + %x = call float @llvm.fmuladd.f32(float 1.0, float 2.0, float 4.0) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_fma_f64 +; CHECK-NEXT: ret double 6.000000e+00 +define double @constant_fold_fma_f64() #0 { + %x = call double @llvm.fma.f64(double 1.0, double 2.0, double 4.0) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_fmuladd_f64 +; CHECK-NEXT: ret double 6.000000e+00 +define double @constant_fold_fmuladd_f64() #0 { + %x = call double @llvm.fmuladd.f64(double 1.0, double 2.0, double 4.0) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/copysign.ll b/test/Transforms/InstCombine/copysign.ll new file mode 100644 index 0000000..556b799 --- /dev/null +++ b/test/Transforms/InstCombine/copysign.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare float @llvm.copysign.f32(float, float) #0 +declare double @llvm.copysign.f64(double, double) #0 + +; CHECK-LABEL: @constant_fold_copysign_f32_01 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_copysign_f32_01() #0 { + %x = call float @llvm.copysign.f32(float 1.0, float -2.0) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_copysign_f32_02 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_copysign_f32_02() #0 { + %x = call float @llvm.copysign.f32(float -2.0, float 1.0) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_copysign_f32_03 +; CHECK-NEXT: ret float -2.000000e+00 +define float @constant_fold_copysign_f32_03() #0 { + %x = call float @llvm.copysign.f32(float -2.0, float -1.0) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_copysign_f64_01 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_copysign_f64_01() #0 { + %x = call double @llvm.copysign.f64(double 1.0, double -2.0) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_copysign_f64_02 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_copysign_f64_02() #0 { + %x = call double @llvm.copysign.f64(double -1.0, double 2.0) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_copysign_f64_03 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_copysign_f64_03() #0 { + %x = call double @llvm.copysign.f64(double -1.0, double -2.0) #0 + ret double %x +} + + +attributes #0 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll index f67fd1c..1bf486f 100644 --- a/test/Transforms/InstCombine/div.ll +++ b/test/Transforms/InstCombine/div.ll @@ -131,4 +131,28 @@ define i32 @test15(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: ret i32 } +define <2 x i64> @test16(<2 x i64> %x) nounwind { + %shr = lshr <2 x i64> %x, + %div = udiv <2 x i64> %shr, + ret <2 x i64> %div +; CHECK-LABEL: @test16( +; CHECK-NEXT: udiv <2 x i64> %x, +; CHECK-NEXT: ret <2 x i64> +} + +define <2 x i64> @test17(<2 x i64> %x) nounwind { + %neg = sub nsw <2 x i64> zeroinitializer, %x + %div = sdiv <2 x i64> %neg, + ret <2 x i64> %div +; CHECK-LABEL: @test17( +; CHECK-NEXT: sdiv <2 x i64> %x, +; CHECK-NEXT: ret <2 x i64> +} +define <2 x i64> @test18(<2 x i64> %x) nounwind { + %div = sdiv <2 x i64> %x, + ret <2 x i64> %div +; CHECK-LABEL: @test18( +; CHECK-NEXT: sub <2 x i64> zeroinitializer, %x +; CHECK-NEXT: ret <2 x i64> +} diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll index 5cacb59..d958470 100644 --- a/test/Transforms/InstCombine/double-float-shrink-1.ll +++ b/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -157,7 +157,10 @@ define float @exp10_test(float %f) nounwind readnone { %call = call double @exp10(double %conv) %conv1 = fptrunc double %call to float ret float %conv1 -; CHECK: call float @exp10f(float %f) +; FIXME: Re-enable this when Linux allows transforming this again, or when we +; can use builtin attributes to test the transform regardless of OS. +; DISABLED-CHECK: call float @exp10f(float %f) +; CHECK: call double @exp10(double %conv) } define double @exp10_test2(float %f) nounwind readnone { diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll index 99fb9ec..8e6a0e0 100644 --- a/test/Transforms/InstCombine/exp2-1.ll +++ b/test/Transforms/InstCombine/exp2-1.ll @@ -1,6 +1,7 @@ ; Test that the exp2 library call simplifier works correctly. ; ; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -74,3 +75,26 @@ define float @test_simplify8(i8 zeroext %x) { ; CHECK: call float @ldexpf ret float %ret } + +declare double @llvm.exp2.f64(double) +declare float @llvm.exp2.f32(float) + +define double @test_simplify9(i8 zeroext %x) { +; CHECK-LABEL: @test_simplify9( +; CHECK-WIN-LABEL: @test_simplify9( + %conv = uitofp i8 %x to double + %ret = call double @llvm.exp2.f64(double %conv) +; CHECK: call double @ldexp +; CHECK-WIN: call double @ldexp + ret double %ret +} + +define float @test_simplify10(i8 zeroext %x) { +; CHECK-LABEL: @test_simplify10( +; CHECK-WIN-LABEL: @test_simplify10( + %conv = uitofp i8 %x to float + %ret = call float @llvm.exp2.f32(float %conv) +; CHECK: call float @ldexpf +; CHECK-WIN-NOT: call float @ldexpf + ret float %ret +} diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index d8ba2a5..2ee4b0f 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -140,6 +140,42 @@ define float @fold13(float %x) { ; CHECK: ret } +; -x + y => y - x +define float @fold14(float %x, float %y) { + %neg = fsub fast float -0.0, %x + %add = fadd fast float %neg, %y + ret float %add +; CHECK: fold14 +; CHECK: fsub fast float %y, %x +; CHECK: ret +} + +; x + -y => x - y +define float @fold15(float %x, float %y) { + %neg = fsub fast float -0.0, %y + %add = fadd fast float %x, %neg + ret float %add +; CHECK: fold15 +; CHECK: fsub fast float %x, %y +; CHECK: ret +} + +; (select X+Y, X-Y) => X + (select Y, -Y) +define float @fold16(float %x, float %y) { + %cmp = fcmp ogt float %x, %y + %plus = fadd fast float %x, %y + %minus = fsub fast float %x, %y + %r = select i1 %cmp, float %plus, float %minus + ret float %r +; CHECK: fold16 +; CHECK: fsub fast float +; CHECK: select +; CHECK: fadd fast float +; CHECK: ret +} + + + ; ========================================================================= ; ; Testing-cases about fmul begin @@ -223,6 +259,14 @@ define float @fmul3(float %f1, float %f2) { ; CHECK: fmul fast float %f1, 3.000000e+00 } +define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) { + %t1 = fdiv <4 x float> %f1, + %t3 = fmul fast <4 x float> %t1, + ret <4 x float> %t3 +; CHECK-LABEL: @fmul3_vec( +; CHECK: fmul fast <4 x float> %f1, +} + ; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special ; value of a denormal. The 0x3810000000000000 here take value FLT_MIN ; @@ -309,6 +353,15 @@ define float @fdiv2(float %x) { ; CHECK: fmul fast float %x, 0x3FE0B21660000000 } +define <2 x float> @fdiv2_vec(<2 x float> %x) { + %mul = fmul <2 x float> %x, + %div1 = fdiv fast <2 x float> %mul, + ret <2 x float> %div1 + +; CHECK-LABEL: @fdiv2_vec( +; CHECK: fmul fast <2 x float> %x, +} + ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal ; define float @fdiv3(float %x) { diff --git a/test/Transforms/InstCombine/fdiv.ll b/test/Transforms/InstCombine/fdiv.ll index 1edbc5e..af6a240 100644 --- a/test/Transforms/InstCombine/fdiv.ll +++ b/test/Transforms/InstCombine/fdiv.ll @@ -23,3 +23,29 @@ define float @test3(float %x) nounwind readnone ssp { ; CHECK-LABEL: @test3( ; CHECK-NEXT: fdiv float %x, 0x36A0000000000000 } + +define float @test4(float %x) nounwind readnone ssp { + %div = fdiv fast float %x, 8.0 + ret float %div + +; CHECK-LABEL: @test4( +; CHECK-NEXT: fmul fast float %x, 1.250000e-01 +} + +define float @test5(float %x, float %y, float %z) nounwind readnone ssp { + %div1 = fdiv fast float %x, %y + %div2 = fdiv fast float %div1, %z + ret float %div2 +; CHECK-LABEL: @test5( +; CHECK-NEXT: fmul fast +; CHECK-NEXT: fdiv fast +} + +define float @test6(float %x, float %y, float %z) nounwind readnone ssp { + %div1 = fdiv fast float %x, %y + %div2 = fdiv fast float %z, %div1 + ret float %div2 +; CHECK-LABEL: @test6( +; CHECK-NEXT: fmul fast +; CHECK-NEXT: fdiv fast +} diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll index 26f77a7..e500467 100644 --- a/test/Transforms/InstCombine/float-shrink-compare.ll +++ b/test/Transforms/InstCombine/float-shrink-compare.ll @@ -170,6 +170,58 @@ define i32 @test14(float %x, float %y) nounwind uwtable { ; CHECK-NEXT: fcmp oeq float %truncf, %y } +define i32 @test15(float %x, float %y, float %z) nounwind uwtable { + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @fmin(double %1, double %2) nounwind + %4 = fpext float %z to double + %5 = fcmp oeq double %3, %4 + %6 = zext i1 %5 to i32 + ret i32 %6 +; CHECK-LABEL: @test15( +; CHECK-NEXT: %fminf = call float @fminf(float %x, float %y) +; CHECK-NEXT: fcmp oeq float %fminf, %z +} + +define i32 @test16(float %x, float %y, float %z) nounwind uwtable { + %1 = fpext float %z to double + %2 = fpext float %x to double + %3 = fpext float %y to double + %4 = call double @fmin(double %2, double %3) nounwind + %5 = fcmp oeq double %1, %4 + %6 = zext i1 %5 to i32 + ret i32 %6 +; CHECK-LABEL: @test16( +; CHECK-NEXT: %fminf = call float @fminf(float %x, float %y) +; CHECK-NEXT: fcmp oeq float %fminf, %z +} + +define i32 @test17(float %x, float %y, float %z) nounwind uwtable { + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @fmax(double %1, double %2) nounwind + %4 = fpext float %z to double + %5 = fcmp oeq double %3, %4 + %6 = zext i1 %5 to i32 + ret i32 %6 +; CHECK-LABEL: @test17( +; CHECK-NEXT: %fmaxf = call float @fmaxf(float %x, float %y) +; CHECK-NEXT: fcmp oeq float %fmaxf, %z +} + +define i32 @test18(float %x, float %y, float %z) nounwind uwtable { + %1 = fpext float %z to double + %2 = fpext float %x to double + %3 = fpext float %y to double + %4 = call double @fmax(double %2, double %3) nounwind + %5 = fcmp oeq double %1, %4 + %6 = zext i1 %5 to i32 + ret i32 %6 +; CHECK-LABEL: @test18( +; CHECK-NEXT: %fmaxf = call float @fmaxf(float %x, float %y) +; CHECK-NEXT: fcmp oeq float %fmaxf, %z +} + declare double @fabs(double) nounwind readnone declare double @ceil(double) nounwind readnone declare double @floor(double) nounwind readnone @@ -177,3 +229,5 @@ declare double @nearbyint(double) nounwind readnone declare double @rint(double) nounwind readnone declare double @round(double) nounwind readnone declare double @trunc(double) nounwind readnone +declare double @fmin(double, double) nounwind readnone +declare double @fmax(double, double) nounwind readnone diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll index 402ee52..18cbf9d 100644 --- a/test/Transforms/InstCombine/fmul.ll +++ b/test/Transforms/InstCombine/fmul.ll @@ -24,10 +24,10 @@ define float @test2(float %x) { define float @test3(float %x, float %y) { %sub1 = fsub float -0.000000e+00, %x %sub2 = fsub float -0.000000e+00, %y - %mul = fmul float %sub1, %sub2 + %mul = fmul fast float %sub1, %sub2 ret float %mul ; CHECK-LABEL: @test3( -; CHECK: fmul float %x, %y +; CHECK: fmul fast float %x, %y } ; (0.0 - X) * (0.0 - Y) => X * Y @@ -93,3 +93,33 @@ for.body: ; preds = %for.cond for.end: ; preds = %for.cond ret void } + +; X * -1.0 => -0.0 - X +define float @test9(float %x) { + %mul = fmul float %x, -1.0 + ret float %mul + +; CHECK-LABEL: @test9( +; CHECK-NOT: fmul +; CHECK: fsub +} + +; PR18532 +define <4 x float> @test10(<4 x float> %x) { + %mul = fmul <4 x float> %x, + ret <4 x float> %mul + +; CHECK-LABEL: @test10( +; CHECK-NOT: fmul +; CHECK: fsub +} + +define float @test11(float %x, float %y) { + %a = fadd fast float %x, 1.0 + %b = fadd fast float %y, 2.0 + %c = fadd fast float %a, %b + ret float %c +; CHECK-LABEL: @test11( +; CHECK-NOT: fadd float +; CHECK: fadd fast float +} diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll index 05d1b48..9be66fd 100644 --- a/test/Transforms/InstCombine/fpcast.ll +++ b/test/Transforms/InstCombine/fpcast.ll @@ -31,6 +31,15 @@ define half @test4(float %a) { ret half %c } +; CHECK: test4-fast +define half @test4-fast(float %a) { +; CHECK: fptrunc +; CHECK: fsub fast + %b = fsub fast float -0.0, %a + %c = fptrunc float %b to half + ret half %c +} + ; CHECK: test5 define half @test5(float %a, float %b, float %c) { ; CHECK: fcmp ogt diff --git a/test/Transforms/InstCombine/fpextend.ll b/test/Transforms/InstCombine/fpextend.ll index 70e0c62..8640cd2 100644 --- a/test/Transforms/InstCombine/fpextend.ll +++ b/test/Transforms/InstCombine/fpextend.ll @@ -1,3 +1,4 @@ + ; RUN: opt < %s -instcombine -S | not grep fpext @X = external global float @Y = external global float @@ -12,6 +13,18 @@ entry: ret void } +define void @test2() nounwind { +entry: + %tmp = load float* @X, align 4 ; [#uses=1] + %tmp1 = fpext float %tmp to double ; [#uses=1] + %tmp2 = load float* @Y, align 4 ; [#uses=1] + %tmp23 = fpext float %tmp2 to double ; [#uses=1] + %tmp5 = fmul double %tmp1, %tmp23 ; [#uses=1] + %tmp56 = fptrunc double %tmp5 to float ; [#uses=1] + store float %tmp56, float* @X, align 4 + ret void +} + define void @test3() nounwind { entry: %tmp = load float* @X, align 4 ; [#uses=1] @@ -33,4 +46,3 @@ entry: store float %tmp34, float* @X, align 4 ret void } - diff --git a/test/Transforms/InstCombine/fpextend_x86.ll b/test/Transforms/InstCombine/fpextend_x86.ll new file mode 100644 index 0000000..e012551 --- /dev/null +++ b/test/Transforms/InstCombine/fpextend_x86.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -S | FileCheck %s +target triple = "x86_64-apple-macosx" + +define double @test1(double %a, double %b) nounwind { + %wa = fpext double %a to x86_fp80 + %wb = fpext double %b to x86_fp80 + %wr = fadd x86_fp80 %wa, %wb + %r = fptrunc x86_fp80 %wr to double + ret double %r +; CHECK: test1 +; CHECK: fadd x86_fp80 +; CHECK: ret +} + +define double @test2(double %a, double %b) nounwind { + %wa = fpext double %a to x86_fp80 + %wb = fpext double %b to x86_fp80 + %wr = fsub x86_fp80 %wa, %wb + %r = fptrunc x86_fp80 %wr to double + ret double %r +; CHECK: test2 +; CHECK: fsub x86_fp80 +; CHECK: ret +} + +define double @test3(double %a, double %b) nounwind { + %wa = fpext double %a to x86_fp80 + %wb = fpext double %b to x86_fp80 + %wr = fmul x86_fp80 %wa, %wb + %r = fptrunc x86_fp80 %wr to double + ret double %r +; CHECK: test3 +; CHECK: fmul x86_fp80 +; CHECK: ret +} + +define double @test4(double %a, half %b) nounwind { + %wa = fpext double %a to x86_fp80 + %wb = fpext half %b to x86_fp80 + %wr = fmul x86_fp80 %wa, %wb + %r = fptrunc x86_fp80 %wr to double + ret double %r +; CHECK: test4 +; CHECK: fmul double +; CHECK: ret +} + +define double @test5(double %a, double %b) nounwind { + %wa = fpext double %a to x86_fp80 + %wb = fpext double %b to x86_fp80 + %wr = fdiv x86_fp80 %wa, %wb + %r = fptrunc x86_fp80 %wr to double + ret double %r +; CHECK: test5 +; CHECK: fdiv x86_fp80 +; CHECK: ret +} diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll index 3f6a314..6741345 100644 --- a/test/Transforms/InstCombine/fprintf-1.ll +++ b/test/Transforms/InstCombine/fprintf-1.ll @@ -56,18 +56,18 @@ define void @test_simplify4(%FILE* %fp) { ; CHECK-IPRINTF-LABEL: @test_simplify4( %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0 call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187) -; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187) +; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187) ret void -; CHECK-NEXT-IPRINTF: ret void +; CHECK-IPRINTF-NEXT: ret void } define void @test_no_simplify1(%FILE* %fp) { ; CHECK-IPRINTF-LABEL: @test_no_simplify1( %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0 call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87) -; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00) +; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00) ret void -; CHECK-NEXT-IPRINTF: ret void +; CHECK-IPRINTF-NEXT: ret void } define void @test_no_simplify2(%FILE* %fp, double %d) { diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll index c29a7dc..ef0cb29 100644 --- a/test/Transforms/InstCombine/getelementptr.ll +++ b/test/Transforms/InstCombine/getelementptr.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32" +target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64" %intstruct = type { i32 } %pair = type { i32, i32 } @@ -728,6 +728,19 @@ define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 % ret i64 %x } +; gep should be done in the original address space. +define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) { +; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast( +; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V +; CHECK-NEXT: %t = addrspacecast double* +; CHECK: load i64 addrspace(3)* %t + %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)* + %V = mul i64 %N, 8 + %t = getelementptr i64 addrspace(3)* %cast, i64 %V + %x = load i64 addrspace(3)* %t + ret i64 %x +} + ; The element size of the array is different the element size of the pointer define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) { ; CHECK-LABEL: @test_gep_bitcast_array_different_size_element( @@ -789,4 +802,13 @@ define i16 @test41([3 x i32] addrspace(1)* %array) { ; CHECK-NEXT: ret i16 8 } +define i32 addrspace(1)* @ascast_0_gep([128 x i32]* %p) nounwind { +; CHECK-LABEL: @ascast_0_gep( +; CHECK-NOT: getelementptr +; CHECK: ret + %gep = getelementptr [128 x i32]* %p, i32 0, i32 0 + %x = addrspacecast i32* %gep to i32 addrspace(1)* + ret i32 addrspace(1)* %x +} + ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll new file mode 100644 index 0000000..8929c82 --- /dev/null +++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -instcombine %s | FileCheck %s + +define <1 x i8> @test1(<8 x i8> %in) { +; CHECK-LABEL: @test1 +; CHECK: shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32> + %val = extractelement <8 x i8> %in, i32 5 + %vec = insertelement <1 x i8> undef, i8 %val, i32 0 + ret <1 x i8> %vec +} + +define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) { +; CHECK-LABEL: @test2 +; CHECK: shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32> + %elt0 = extractelement <8 x i16> %in, i32 3 + %elt1 = extractelement <8 x i16> %in, i32 1 + %elt2 = extractelement <8 x i16> %in2, i32 0 + %elt3 = extractelement <8 x i16> %in, i32 2 + + %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0 + %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1 + %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2 + %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3 + + ret <4 x i16> %vec.3 +} + +define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: @test_vcopyq_lane_p64 +; CHECK: extractelement +; CHECK: insertelement +; CHECK-NOT: shufflevector +entry: + %elt = extractelement <1 x i64> %b, i32 0 + %res = insertelement <2 x i64> %a, i64 %elt, i32 1 + ret <2 x i64> %res +} + diff --git a/test/Transforms/InstCombine/load-addrspace-cast.ll b/test/Transforms/InstCombine/load-addrspace-cast.ll new file mode 100644 index 0000000..fd6339c --- /dev/null +++ b/test/Transforms/InstCombine/load-addrspace-cast.ll @@ -0,0 +1,12 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-n8:16:32:64" + +define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind { +; CHECK-LABEL: @pointer_to_addrspace_pointer( +; CHECK: load +; CHECK: addrspacecast + %y = bitcast i32 addrspace(1)** %x to i32** + %z = load i32** %y + ret i32* %z +} + diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll index 94fc118..d19bedc 100644 --- a/test/Transforms/InstCombine/mul.ll +++ b/test/Transforms/InstCombine/mul.ll @@ -181,3 +181,19 @@ define i32 @test19(i32 %A, i32 %B) { ret i32 %H ; CHECK: ret i32 0 } + +define <2 x i64> @test20(<2 x i64> %A) { +; CHECK-LABEL: @test20( + %B = add <2 x i64> %A, + %C = mul <2 x i64> %B, + ret <2 x i64> %C +; CHECK: mul <2 x i64> %A, +; CHECK: add <2 x i64> %{{.}}, +} + +define <2 x i1> @test21(<2 x i1> %A, <2 x i1> %B) { +; CHECK-LABEL: @test21( + %C = mul <2 x i1> %A, %B + ret <2 x i1> %C +; CHECK: %C = and <2 x i1> %A, %B +} diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll index 51f955c..496d847 100644 --- a/test/Transforms/InstCombine/onehot_merge.ll +++ b/test/Transforms/InstCombine/onehot_merge.ll @@ -16,7 +16,7 @@ bb: ;CHECK: @foo1_and ;CHECK: shl i32 1, %c1 -;CHECK-NEXT: shl i32 1, %c2 +;CHECK-NEXT: lshr i32 -2147483648, %c2 ;CHECK-NEXT: or i32 ;CHECK-NEXT: and i32 ;CHECK-NEXT: icmp ne i32 %1, %0 @@ -24,7 +24,7 @@ bb: define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) { bb: %tmp = shl i32 1, %c1 - %tmp4 = shl i32 1, %c2 + %tmp4 = lshr i32 -2147483648, %c2 %tmp1 = and i32 %tmp, %k %tmp2 = icmp eq i32 %tmp1, 0 %tmp5 = and i32 %tmp4, %k diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll index 9f1d073..fb3b7d7 100644 --- a/test/Transforms/InstCombine/pow-1.ll +++ b/test/Transforms/InstCombine/pow-1.ll @@ -1,6 +1,11 @@ ; Test that the pow library call simplifier works correctly. ; ; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefix=CHECK-NO-EXP10 ; rdar://7251832 ; NOTE: The readonly attribute on the pow call should be preserved @@ -155,13 +160,33 @@ declare double @llvm.pow.f64(double %Val, double %Power) define double @test_simplify17(double %x) { ; CHECK-LABEL: @test_simplify17( %retval = call double @llvm.pow.f64(double %x, double 0.5) -; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]] -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]] +; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval ; CHECK-NEXT: ret double [[SELECT]] } +; Check pow(10.0, x) -> __exp10(x) on OS X 10.9+ and iOS 7.0+. + +define float @test_simplify18(float %x) { +; CHECK-LABEL: @test_simplify18( + %retval = call float @powf(float 10.0, float %x) +; CHECK-EXP10: [[EXP10F:%[_a-z0-9]+]] = call float @__exp10f(float %x) [[NUW_RO:#[0-9]+]] + ret float %retval +; CHECK-EXP10: ret float [[EXP10F]] +; CHECK-NO-EXP10: call float @powf +} + +define double @test_simplify19(double %x) { +; CHECK-LABEL: @test_simplify19( + %retval = call double @pow(double 10.0, double %x) +; CHECK-EXP10: [[EXP10:%[_a-z0-9]+]] = call double @__exp10(double %x) [[NUW_RO]] + ret double %retval +; CHECK-EXP10: ret double [[EXP10]] +; CHECK-NO-EXP10: call double @pow +} + ; CHECK: attributes [[NUW_RO]] = { nounwind readonly } diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll index c98ddd5..483bc7a 100644 --- a/test/Transforms/InstCombine/printf-1.ll +++ b/test/Transforms/InstCombine/printf-1.ll @@ -87,18 +87,18 @@ define void @test_simplify7() { ; CHECK-IPRINTF-LABEL: @test_simplify7( %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0 call i32 (i8*, ...)* @printf(i8* %fmt, i32 187) -; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187) +; CHECK-IPRINTF-NEXT: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187) ret void -; CHECK-NEXT-IPRINTF: ret void +; CHECK-IPRINTF-NEXT: ret void } define void @test_no_simplify1() { ; CHECK-IPRINTF-LABEL: @test_no_simplify1( %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0 call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87) -; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00) +; CHECK-IPRINTF-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00) ret void -; CHECK-NEXT-IPRINTF: ret void +; CHECK-IPRINTF-NEXT: ret void } define void @test_no_simplify2(i8* %fmt, double %d) { diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll index 22fd90b..9f07702 100644 --- a/test/Transforms/InstCombine/rem.ll +++ b/test/Transforms/InstCombine/rem.ll @@ -204,3 +204,12 @@ define i32 @test19(i32 %x, i32 %y) { %E = urem i32 %y, %D ret i32 %E } + +define <2 x i64> @test20(<2 x i64> %X, <2 x i1> %C) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: select <2 x i1> %C, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: ret <2 x i64> + %V = select <2 x i1> %C, <2 x i64> , <2 x i64> + %R = urem <2 x i64> %V, + ret <2 x i64> %R +} diff --git a/test/Transforms/InstCombine/round.ll b/test/Transforms/InstCombine/round.ll new file mode 100644 index 0000000..ecc62dd --- /dev/null +++ b/test/Transforms/InstCombine/round.ll @@ -0,0 +1,90 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare float @llvm.round.f32(float) #0 +declare double @llvm.round.f64(double) #0 + +; CHECK-LABEL: @constant_fold_round_f32_01 +; CHECK-NEXT: ret float 1.000000e+00 +define float @constant_fold_round_f32_01() #0 { + %x = call float @llvm.round.f32(float 1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_02 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_round_f32_02() #0 { + %x = call float @llvm.round.f32(float -1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_03 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_round_f32_03() #0 { + %x = call float @llvm.round.f32(float 1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_04 +; CHECK-NEXT: ret float -2.000000e+00 +define float @constant_fold_round_f32_04() #0 { + %x = call float @llvm.round.f32(float -1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_05 +; CHECK-NEXT: ret float 3.000000e+00 +define float @constant_fold_round_f32_05() #0 { + %x = call float @llvm.round.f32(float 2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_06 +; CHECK-NEXT: ret float -3.000000e+00 +define float @constant_fold_round_f32_06() #0 { + %x = call float @llvm.round.f32(float -2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f64_01 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_round_f64_01() #0 { + %x = call double @llvm.round.f64(double 1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_02 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_round_f64_02() #0 { + %x = call double @llvm.round.f64(double -1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_03 +; CHECK-NEXT: ret double 2.000000e+00 +define double @constant_fold_round_f64_03() #0 { + %x = call double @llvm.round.f64(double 1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_04 +; CHECK-NEXT: ret double -2.000000e+00 +define double @constant_fold_round_f64_04() #0 { + %x = call double @llvm.round.f64(double -1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_05 +; CHECK-NEXT: ret double 3.000000e+00 +define double @constant_fold_round_f64_05() #0 { + %x = call double @llvm.round.f64(double 2.7) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_06 +; CHECK-NEXT: ret double -3.000000e+00 +define double @constant_fold_round_f64_06() #0 { + %x = call double @llvm.round.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll index 5b9deb4..832d958 100644 --- a/test/Transforms/InstCombine/select-2.ll +++ b/test/Transforms/InstCombine/select-2.ll @@ -19,3 +19,13 @@ define i32 @t2(i32 %c, i32 %x) nounwind { %t3 = select i1 %t1, i32 %t2, i32 %x ret i32 %t3 } + +define float @t3(float %x, float %y) nounwind { + %t1 = fcmp ogt float %x, %y + %t2 = select i1 %t1, float %x, float 1.0 + %t3 = fadd fast float %t2, 1.0 + ret float %t3 +; CHECK-LABEL: @t3( +; CHECK: fadd fast +; CHECK: select +} diff --git a/test/Transforms/InstCombine/select-select.ll b/test/Transforms/InstCombine/select-select.ll new file mode 100644 index 0000000..65820ac --- /dev/null +++ b/test/Transforms/InstCombine/select-select.ll @@ -0,0 +1,24 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +; CHECK: @foo1 +define float @foo1(float %a) #0 { +; CHECK-NOT: xor + %b = fcmp ogt float %a, 0.000000e+00 + %c = select i1 %b, float %a, float 0.000000e+00 + %d = fcmp olt float %c, 1.000000e+00 + %f = select i1 %d, float %c, float 1.000000e+00 + ret float %f +} + +; CHECK: @foo2 +define float @foo2(float %a) #0 { +; CHECK-NOT: xor + %b = fcmp ogt float %a, 0.000000e+00 + %c = select i1 %b, float %a, float 0.000000e+00 + %d = fcmp olt float %c, 1.000000e+00 + %e = select i1 %b, float %a, float 0.000000e+00 + %f = select i1 %d, float %e, float 1.000000e+00 + ret float %f +} + +attributes #0 = { nounwind readnone ssp uwtable } diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll index 95ed9b9..aa23d93 100644 --- a/test/Transforms/InstCombine/sign-test-and-or.ll +++ b/test/Transforms/InstCombine/sign-test-and-or.ll @@ -177,3 +177,41 @@ if.then: if.end: ret void } + +define void @test10(i32 %a) nounwind { + %1 = and i32 %a, 2 + %2 = icmp eq i32 %1, 0 + %3 = icmp ult i32 %a, 4 + %or.cond = and i1 %2, %3 + br i1 %or.cond, label %if.then, label %if.end + +; CHECK-LABEL: @test10( +; CHECK-NEXT: %1 = icmp ult i32 %a, 2 +; CHECK-NEXT: br i1 %1, label %if.then, label %if.end + +if.then: + tail call void @foo() nounwind + ret void + +if.end: + ret void +} + +define void @test11(i32 %a) nounwind { + %1 = and i32 %a, 2 + %2 = icmp ne i32 %1, 0 + %3 = icmp ugt i32 %a, 3 + %or.cond = or i1 %2, %3 + br i1 %or.cond, label %if.then, label %if.end + +; CHECK-LABEL: @test11( +; CHECK-NEXT: %1 = icmp ugt i32 %a, 1 +; CHECK-NEXT: br i1 %1, label %if.then, label %if.end + +if.then: + tail call void @foo() nounwind + ret void + +if.end: + ret void +} diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll index 0d1a602..739827f 100644 --- a/test/Transforms/InstCombine/sincospi.ll +++ b/test/Transforms/InstCombine/sincospi.ll @@ -1,5 +1,6 @@ ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s +; RUN: opt -instcombine -S < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS ; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS @@ -23,12 +24,12 @@ define float @test_instbased_f32() { %res = fadd float %sin, %cos ret float %res ; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32 -; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]]) +; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float [[VAL]]) ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1 ; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32 -; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]]) +; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float [[VAL]]) ; CHECK: extractvalue { float, float } [[SINCOS]], 0 ; CHECK: extractvalue { float, float } [[SINCOS]], 1 @@ -41,11 +42,11 @@ define float @test_constant_f32() { %cos = call float @__cospif(float 1.0) #0 %res = fadd float %sin, %cos ret float %res -; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00) +; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float 1.000000e+00) ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1 -; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00) +; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float 1.000000e+00) ; CHECK: extractvalue { float, float } [[SINCOS]], 0 ; CHECK: extractvalue { float, float } [[SINCOS]], 1 diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll index 78dd7aa..afa38f3 100644 --- a/test/Transforms/InstCombine/sprintf-1.ll +++ b/test/Transforms/InstCombine/sprintf-1.ll @@ -77,18 +77,18 @@ define void @test_simplify6(i8* %dst) { ; CHECK-IPRINTF-LABEL: @test_simplify6( %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0 call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187) -; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187) +; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187) ret void -; CHECK-NEXT-IPRINTF: ret void +; CHECK-IPRINTF-NEXT: ret void } define void @test_no_simplify1(i8* %dst) { ; CHECK-IPRINTF-LABEL: @test_no_simplify1( %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0 call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87) -; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00) +; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00) ret void -; CHECK-NEXT-IPRINTF: ret void +; CHECK-IPRINTF-NEXT: ret void } define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) { diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll index d2c9894..66b3e2e 100644 --- a/test/Transforms/InstCombine/strchr-1.ll +++ b/test/Transforms/InstCombine/strchr-1.ll @@ -63,3 +63,16 @@ define void @test_simplify5() { store i8* %dst, i8** @chp ret void } + +; Check transformation strchr(p, 0) -> p + strlen(p) +define void @test_simplify6(i8* %str) { +; CHECK: %strlen = call i32 @strlen(i8* %str) +; CHECK-NOT: call i8* @strchr +; CHECK: %strchr = getelementptr i8* %str, i32 %strlen +; CHECK: store i8* %strchr, i8** @chp, align 4 +; CHECK: ret void + + %dst = call i8* @strchr(i8* %str, i32 0) + store i8* %dst, i8** @chp + ret void +} diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll index 36c523b..41d803c8 100644 --- a/test/Transforms/InstCombine/sub.ll +++ b/test/Transforms/InstCombine/sub.ll @@ -391,4 +391,56 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ret i16 %sub } - +define <2 x i64> @test31(<2 x i64> %A) { + %xor = xor <2 x i64> %A, + %sub = sub <2 x i64> , %xor + ret <2 x i64> %sub +; CHECK-LABEL: @test31( +; CHECK-NEXT: %sub = add <2 x i64> %A, +; CHECK-NEXT: ret <2 x i64> %sub +} + +define <2 x i64> @test32(<2 x i64> %A) { + %add = add <2 x i64> %A, + %sub = sub <2 x i64> , %add + ret <2 x i64> %sub +; CHECK-LABEL: @test32( +; CHECK-NEXT: %sub = sub <2 x i64> +; CHECK-NEXT: ret <2 x i64> %sub +} + +define <2 x i64> @test33(<2 x i1> %A) { + %ext = zext <2 x i1> %A to <2 x i64> + %sub = sub <2 x i64> zeroinitializer, %ext + ret <2 x i64> %sub +; CHECK-LABEL: @test33( +; CHECK-NEXT: %sub = sext <2 x i1> %A to <2 x i64> +; CHECK-NEXT: ret <2 x i64> %sub +} + +define <2 x i64> @test34(<2 x i1> %A) { + %ext = sext <2 x i1> %A to <2 x i64> + %sub = sub <2 x i64> zeroinitializer, %ext + ret <2 x i64> %sub +; CHECK-LABEL: @test34( +; CHECK-NEXT: %sub = zext <2 x i1> %A to <2 x i64> +; CHECK-NEXT: ret <2 x i64> %sub +} + +define <2 x i64> @test35(<2 x i64> %A) { + %mul = mul <2 x i64> %A, + %sub = sub <2 x i64> %A, %mul + ret <2 x i64> %sub +; CHECK-LABEL: @test35( +; CHECK-NEXT: %sub = mul <2 x i64> %A, +; CHECK-NEXT: ret <2 x i64> %sub +} + +define <2 x i64> @test36(<2 x i64> %A) { + %shl = shl <2 x i64> %A, + %sub = sub <2 x i64> %shl, %A + ret <2 x i64> %sub +; CHECK-LABEL: @test36( +; CHECK-NEXT: %sub = mul <2 x i64> %A, +; CHECK-NEXT: ret <2 x i64> %sub +} diff --git a/test/Transforms/InstCombine/vec_extract_var_elt.ll b/test/Transforms/InstCombine/vec_extract_var_elt.ll index 3c98287..f6f9e01 100644 --- a/test/Transforms/InstCombine/vec_extract_var_elt.ll +++ b/test/Transforms/InstCombine/vec_extract_var_elt.ll @@ -16,3 +16,11 @@ define void @test (float %b, <8 x float> * %p) { ret void } +; PR18600 +define i32 @test2(i32 %i) { + %e = extractelement <4 x i32> bitcast (<2 x i64> to <4 x i32>), i32 %i + ret i32 %e + +; CHECK-LABEL: @test2 +; CHECK: extractelement +} diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll index 73ec1f1..1d778a0 100644 --- a/test/Transforms/InstCombine/vec_phi_extract.ll +++ b/test/Transforms/InstCombine/vec_phi_extract.ll @@ -36,10 +36,10 @@ for.cond: %input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ] br i1 undef, label %for.end, label %for.body -; CHECK extractelement +; CHECK: extractelement for.body: %dec43 = add <3 x i32> %input_1.addr.1, - %sub44 = sub <3 x i32> zeroinitializer, %dec43 + %sub44 = sub <3 x i32> , %dec43 %div45 = sdiv <3 x i32> %input_2.addr.0, %sub44 br label %for.cond diff --git a/test/Transforms/InstCombine/vec_sext.ll b/test/Transforms/InstCombine/vec_sext.ll index d7ab96b..6f0d214 100644 --- a/test/Transforms/InstCombine/vec_sext.ll +++ b/test/Transforms/InstCombine/vec_sext.ll @@ -13,6 +13,7 @@ entry: %cond = or <4 x i32> %2, %3 ret <4 x i32> %cond +; CHECK-LABEL: @psignd_3 ; CHECK: ashr <4 x i32> %b, ; CHECK: sub nsw <4 x i32> zeroinitializer, %a ; CHECK: xor <4 x i32> %b.lobit, @@ -20,3 +21,25 @@ entry: ; CHECK: and <4 x i32> %b.lobit, %sub ; CHECK: or <4 x i32> %1, %2 } + +define <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b) nounwind ssp { +entry: + %cmp = icmp sgt <4 x i32> %b, + %sext = sext <4 x i1> %cmp to <4 x i32> + %sub = sub nsw <4 x i32> zeroinitializer, %a + %0 = icmp slt <4 x i32> %sext, zeroinitializer + %sext3 = sext <4 x i1> %0 to <4 x i32> + %1 = xor <4 x i32> %sext3, + %2 = and <4 x i32> %a, %1 + %3 = and <4 x i32> %sext3, %sub + %cond = or <4 x i32> %2, %3 + ret <4 x i32> %cond + +; CHECK-LABEL: @test1 +; CHECK: ashr <4 x i32> %b, +; CHECK: xor <4 x i32> %b.lobit, +; CHECK: sub nsw <4 x i32> zeroinitializer, %a +; CHECK: and <4 x i32> %b.lobit, %a +; CHECK: and <4 x i32> %b.lobit.not, %sub +; CHECK: or <4 x i32> %0, %1 +} diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index 3ee43dc..a409a91 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -228,3 +228,20 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) { ret <4 x float> %tmp5 } +define <1 x i32> @test16a(i32 %ele) { +; CHECK-LABEL: @test16a( +; CHECK-NEXT: ret <1 x i32> + %tmp0 = insertelement <2 x i32> , i32 %ele, i32 1 + %tmp1 = shl <2 x i32> %tmp0, + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <1 x i32> + ret <1 x i32> %tmp2 +} + +define <4 x i8> @test16b(i8 %ele) { +; CHECK-LABEL: @test16b( +; CHECK-NEXT: ret <4 x i8> + %tmp0 = insertelement <8 x i8> , i8 %ele, i32 6 + %tmp1 = shl <8 x i8> %tmp0, + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <4 x i32> + ret <4 x i8> %tmp2 +} \ No newline at end of file diff --git a/test/Transforms/InstCombine/zext.ll b/test/Transforms/InstCombine/zext.ll index 10eabf7..b62c626 100644 --- a/test/Transforms/InstCombine/zext.ll +++ b/test/Transforms/InstCombine/zext.ll @@ -5,7 +5,41 @@ define i64 @test_sext_zext(i16 %A) { %c1 = zext i16 %A to i32 ; [#uses=1] %c2 = sext i32 %c1 to i64 ; [#uses=1] ret i64 %c2 + +; CHECK-LABEL: @test_sext_zext ; CHECK-NOT: %c1 ; CHECK: %c2 = zext i16 %A to i64 ; CHECK: ret i64 %c2 } + +define <2 x i64> @test2(<2 x i1> %A) { + %xor = xor <2 x i1> %A, + %zext = zext <2 x i1> %xor to <2 x i64> + ret <2 x i64> %zext + +; CHECK-LABEL: @test2 +; CHECK-NEXT: zext <2 x i1> %A to <2 x i64> +; CHECK-NEXT: xor <2 x i64> %1, +} + +define <2 x i64> @test3(<2 x i64> %A) { + %trunc = trunc <2 x i64> %A to <2 x i32> + %and = and <2 x i32> %trunc, + %zext = zext <2 x i32> %and to <2 x i64> + ret <2 x i64> %zext + +; CHECK-LABEL: @test3 +; CHECK-NEXT: and <2 x i64> %A, +} + +define <2 x i64> @test4(<2 x i64> %A) { + %trunc = trunc <2 x i64> %A to <2 x i32> + %and = and <2 x i32> %trunc, + %xor = xor <2 x i32> %and, + %zext = zext <2 x i32> %xor to <2 x i64> + ret <2 x i64> %zext + +; CHECK-LABEL: @test4 +; CHECK-NEXT: xor <2 x i64> %A, +; CHECK-NEXT: and <2 x i64> %1, +} diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll index abb3869..ee6be04 100644 --- a/test/Transforms/InstSimplify/compare.ll +++ b/test/Transforms/InstSimplify/compare.ll @@ -739,3 +739,21 @@ define i1 @non_inbounds_gep_compare2(i64* %a) { ret i1 %cmp ; CHECK-NEXT: ret i1 true } + +define <4 x i8> @vectorselectfold(<4 x i8> %a, <4 x i8> %b) { + %false = icmp ne <4 x i8> zeroinitializer, zeroinitializer + %sel = select <4 x i1> %false, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %sel + +; CHECK-LABEL: @vectorselectfold +; CHECK-NEXT: ret <4 x i8> %b +} + +define <4 x i8> @vectorselectfold2(<4 x i8> %a, <4 x i8> %b) { + %true = icmp eq <4 x i8> zeroinitializer, zeroinitializer + %sel = select <4 x i1> %true, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %sel + +; CHECK-LABEL: @vectorselectfold +; CHECK-NEXT: ret <4 x i8> %a +} diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll index 23cd50f..181c2ef 100644 --- a/test/Transforms/InstSimplify/undef.ll +++ b/test/Transforms/InstSimplify/undef.ll @@ -153,3 +153,10 @@ define i64 @test18(i64 %a) { %r = call i64 (i64)* undef(i64 %a) ret i64 %r } + +; CHECK-LABEL: @test19 +; CHECK: ret <4 x i8> undef +define <4 x i8> @test19(<4 x i8> %a) { + %b = shl <4 x i8> %a, + ret <4 x i8> %b +} diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll index 5ac1dde..1781463 100644 --- a/test/Transforms/InstSimplify/vector_gep.ll +++ b/test/Transforms/InstSimplify/vector_gep.ll @@ -1,4 +1,7 @@ -;RUN: opt -instsimplify -disable-output < %s +; RUN: opt -S -instsimplify < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + declare void @helper(<2 x i8*>) define void @test(<2 x i8*> %a) { %A = getelementptr <2 x i8*> %a, <2 x i32> @@ -6,3 +9,47 @@ define void @test(<2 x i8*> %a) { ret void } +define <4 x i8*> @test1(<4 x i8*> %a) { + %gep = getelementptr <4 x i8*> %a, <4 x i32> zeroinitializer + ret <4 x i8*> %gep + +; CHECK-LABEL: @test1 +; CHECK-NEXT: ret <4 x i8*> %a +} + +define <4 x i8*> @test2(<4 x i8*> %a) { + %gep = getelementptr <4 x i8*> %a + ret <4 x i8*> %gep + +; CHECK-LABEL: @test2 +; CHECK-NEXT: ret <4 x i8*> %a +} + +%struct = type { double, float } + +define <4 x float*> @test3() { + %gep = getelementptr <4 x %struct*> undef, <4 x i32> , <4 x i32> + ret <4 x float*> %gep + +; CHECK-LABEL: @test3 +; CHECK-NEXT: ret <4 x float*> undef +} + +%struct.empty = type { } + +define <4 x %struct.empty*> @test4(<4 x %struct.empty*> %a) { + %gep = getelementptr <4 x %struct.empty*> %a, <4 x i32> + ret <4 x %struct.empty*> %gep + +; CHECK-LABEL: @test4 +; CHECK-NEXT: ret <4 x %struct.empty*> %a +} + +define <4 x i8*> @test5() { + %c = inttoptr <4 x i64> to <4 x i8*> + %gep = getelementptr <4 x i8*> %c, <4 x i32> + ret <4 x i8*> %gep + +; CHECK-LABEL: @test5 +; CHECK-NEXT: ret <4 x i8*> getelementptr (<4 x i8*> , <4 x i32> ) +} diff --git a/test/Transforms/Internalize/lists.ll b/test/Transforms/Internalize/lists.ll index 83e441a2..548c8aa 100644 --- a/test/Transforms/Internalize/lists.ll +++ b/test/Transforms/Internalize/lists.ll @@ -1,7 +1,7 @@ ; No arguments means internalize everything ; RUN: opt < %s -internalize -S | FileCheck --check-prefix=ALL %s -; Non existent files should be treated as if they were empty (so internalize +; Non-existent files should be treated as if they were empty (so internalize ; everything) ; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=ALL %s @@ -48,3 +48,12 @@ define void @foo() { define available_externally void @bar() { ret void } + +; ALL: define dllexport void @export_foo() { +; FOO_AND_J: define dllexport void @export_foo() { +; FOO_AND_BAR: define dllexport void @export_foo() { +; FOO_J_AND_BAR: define dllexport void @export_foo() { +define dllexport void @export_foo() { + ret void +} + diff --git a/test/Transforms/LICM/lcssa-ssa-promoter.ll b/test/Transforms/LICM/lcssa-ssa-promoter.ll new file mode 100644 index 0000000..5df3ef1 --- /dev/null +++ b/test/Transforms/LICM/lcssa-ssa-promoter.ll @@ -0,0 +1,76 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +; +; Manually validate LCSSA form is preserved even after SSAUpdater is used to +; promote things in the loop bodies. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = common global i32 0, align 4 +@y = common global i32 0, align 4 + +define void @PR18688() { +; CHECK-LABEL: @PR18688( + +entry: + br i1 undef, label %return, label %outer.preheader + +outer.preheader: + br label %outer.header +; CHECK: outer.preheader: +; CHECK: br label %outer.header + +outer.header: + store i32 0, i32* @x, align 4 + br i1 undef, label %outer.latch, label %inner.preheader +; CHECK: outer.header: +; CHECK-NEXT: br i1 undef, label %outer.latch, label %inner.preheader + +inner.preheader: + br label %inner.header +; CHECK: inner.preheader: +; CHECK-NEXT: br label %inner.header + +inner.header: + br i1 undef, label %inner.body.rhs, label %inner.latch +; CHECK: inner.header: +; CHECK-NEXT: %[[PHI0:[^,]+]] = phi i32 [ %{{[^,]+}}, %inner.latch ], [ 0, %inner.preheader ] +; CHECK-NEXT: br i1 undef, label %inner.body.rhs, label %inner.latch + +inner.body.rhs: + store i32 0, i32* @x, align 4 + br label %inner.latch +; CHECK: inner.body.rhs: +; CHECK-NEXT: br label %inner.latch + +inner.latch: + %y_val = load i32* @y, align 4 + %icmp = icmp eq i32 %y_val, 0 + br i1 %icmp, label %inner.exit, label %inner.header +; CHECK: inner.latch: +; CHECK-NEXT: %[[PHI1:[^,]+]] = phi i32 [ 0, %inner.body.rhs ], [ %[[PHI0]], %inner.header ] +; CHECK-NEXT: br i1 %{{[^,]+}}, label %inner.exit, label %inner.header + +inner.exit: + br label %outer.latch +; CHECK: inner.exit: +; CHECK-NEXT: %[[INNER_LCSSA:[^,]+]] = phi i32 [ %[[PHI1]], %inner.latch ] +; CHECK-NEXT: br label %outer.latch + +outer.latch: + br i1 undef, label %outer.exit, label %outer.header +; CHECK: outer.latch: +; CHECK-NEXT: %[[PHI2:[^,]+]] = phi i32 [ %[[INNER_LCSSA]], %inner.exit ], [ 0, %outer.header ] +; CHECK-NEXT: br i1 {{.*}}, label %outer.exit, label %outer.header + +outer.exit: + br label %return +; CHECK: outer.exit: +; CHECK-NEXT: %[[OUTER_LCSSA:[^,]+]] = phi i32 [ %[[PHI2]], %outer.latch ] +; CHECK-NEXT: store i32 %[[OUTER_LCSSA]] +; CHECK-NEXT: br label %return + +return: + ret void +} + diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll index 92ef155..d7e7c6e 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar_promote.ll @@ -24,7 +24,8 @@ Loop: ; preds = %Loop, %0 Out: ret void ; CHECK: Out: -; CHECK-NEXT: store i32 %x2, i32* @X +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X ; CHECK-NEXT: ret void } @@ -48,7 +49,8 @@ Loop: ; preds = %Loop, %0 Exit: ; preds = %Loop ret void ; CHECK: Exit: -; CHECK-NEXT: store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1) +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %V +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: ret void } @@ -142,7 +144,8 @@ Loop: ; preds = %Loop, %0 Out: ret void ; CHECK: Out: -; CHECK-NEXT: store i32 %x2, i32* @X +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X ; CHECK-NEXT: ret void } @@ -178,7 +181,8 @@ for.end: ; preds = %for.cond.for.end_cr ; CHECK: for.body.lr.ph: ; CHECK-NEXT: %gi.promoted = load i32* %gi, align 4, !tbaa !0 ; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: store i32 %inc, i32* %gi, align 4, !tbaa !0 +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %inc +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0 } !0 = metadata !{metadata !4, metadata !4, i64 0} diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll index b503f96..ccc9186 100644 --- a/test/Transforms/LICM/sinking.ll +++ b/test/Transforms/LICM/sinking.ll @@ -53,7 +53,7 @@ Exit: ; CHECK-LABEL: @test3( ; CHECK: Exit.loopexit: -; CHECK-NEXT: %X = add i32 0, 1 +; CHECK-NEXT: %X.le = add i32 0, 1 ; CHECK-NEXT: br label %Exit } @@ -76,8 +76,9 @@ Out: ; preds = %Loop ret i32 %tmp.7 ; CHECK-LABEL: @test4( ; CHECK: Out: -; CHECK-NEXT: mul i32 %N, %N_addr.0.pn -; CHECK-NEXT: sub i32 %tmp.6, %N +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn +; CHECK-NEXT: mul i32 %N, %[[LCSSAPHI]] +; CHECK-NEXT: sub i32 %tmp.6.le, %N ; CHECK-NEXT: ret i32 } @@ -100,8 +101,8 @@ Out: ; preds = %Loop ret i32 %tmp.6 ; CHECK-LABEL: @test5( ; CHECK: Out: -; CHECK-NEXT: %tmp.6 = load i32* @X -; CHECK-NEXT: ret i32 %tmp.6 +; CHECK-NEXT: %tmp.6.le = load i32* @X +; CHECK-NEXT: ret i32 %tmp.6.le } @@ -124,9 +125,9 @@ Out: ; preds = %Loop ret i32 %sunk2 ; CHECK-LABEL: @test6( ; CHECK: Out: -; CHECK-NEXT: %dead = getelementptr %Ty* @X2, i64 0, i32 0 -; CHECK-NEXT: %sunk2 = load i32* %dead -; CHECK-NEXT: ret i32 %sunk2 +; CHECK-NEXT: %dead.le = getelementptr %Ty* @X2, i64 0, i32 0 +; CHECK-NEXT: %sunk2.le = load i32* %dead.le +; CHECK-NEXT: ret i32 %sunk2.le } @@ -152,12 +153,14 @@ Out2: ; preds = %ContLoop ret i32 %tmp.7 ; CHECK-LABEL: @test7( ; CHECK: Out1: -; CHECK-NEXT: mul i32 %N, %N_addr.0.pn -; CHECK-NEXT: sub i32 %tmp.6, %N +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn +; CHECK-NEXT: mul i32 %N, %[[LCSSAPHI]] +; CHECK-NEXT: sub i32 %tmp.6.le, %N ; CHECK-NEXT: ret ; CHECK: Out2: -; CHECK-NEXT: mul i32 %N, %N_addr.0.pn -; CHECK-NEXT: sub i32 %tmp.6 +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn +; CHECK-NEXT: mul i32 %N, %[[LCSSAPHI]] +; CHECK-NEXT: sub i32 %tmp.6.le4, %N ; CHECK-NEXT: ret } @@ -183,8 +186,9 @@ exit2: ; preds = %Cont ; CHECK: exit1: ; CHECK-NEXT: ret i32 0 ; CHECK: exit2: -; CHECK-NEXT: %V = add i32 %X, 1 -; CHECK-NEXT: ret i32 %V +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %X +; CHECK-NEXT: %V.le = add i32 %[[LCSSAPHI]], 1 +; CHECK-NEXT: ret i32 %V.le } @@ -208,7 +212,7 @@ return.i: ; preds = %no_exit.1.i ; CHECK-LABEL: @test9( ; CHECK: loopentry.3.i.preheader.loopexit: -; CHECK-NEXT: %inc.1.i = add i32 0, 1 +; CHECK-NEXT: %inc.1.i.le = add i32 0, 1 ; CHECK-NEXT: br label %loopentry.3.i.preheader } @@ -229,8 +233,9 @@ Out: ; preds = %Loop ; CHECK-LABEL: @test10( ; CHECK: Out: -; CHECK-NEXT: %tmp.6 = sdiv i32 %N, %N_addr.0.pn -; CHECK-NEXT: ret i32 %tmp.6 +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn +; CHECK-NEXT: %tmp.6.le = sdiv i32 %N, %[[LCSSAPHI]] +; CHECK-NEXT: ret i32 %tmp.6.le } ; Should delete, not sink, dead instructions. @@ -246,4 +251,69 @@ Out: ; CHECK-NEXT: ret void } +@c = common global [1 x i32] zeroinitializer, align 4 +; Test a *many* way nested loop with multiple exit blocks both of which exit +; multiple loop nests. This exercises LCSSA corner cases. +define i32 @PR18753(i1* %a, i1* %b, i1* %c, i1* %d) { +entry: + br label %l1.header + +l1.header: + %iv = phi i64 [ %iv.next, %l1.latch ], [ 0, %entry ] + %arrayidx.i = getelementptr inbounds [1 x i32]* @c, i64 0, i64 %iv + br label %l2.header + +l2.header: + %x0 = load i1* %c, align 4 + br i1 %x0, label %l1.latch, label %l3.preheader + +l3.preheader: + br label %l3.header + +l3.header: + %x1 = load i1* %d, align 4 + br i1 %x1, label %l2.latch, label %l4.preheader + +l4.preheader: + br label %l4.header + +l4.header: + %x2 = load i1* %a + br i1 %x2, label %l3.latch, label %l4.body + +l4.body: + call void @f(i32* %arrayidx.i) + %x3 = load i1* %b + %l = trunc i64 %iv to i32 + br i1 %x3, label %l4.latch, label %exit + +l4.latch: + call void @g() + %x4 = load i1* %b, align 4 + br i1 %x4, label %l4.header, label %exit + +l3.latch: + br label %l3.header + +l2.latch: + br label %l2.header + +l1.latch: + %iv.next = add nsw i64 %iv, 1 + br label %l1.header + +exit: + %lcssa = phi i32 [ %l, %l4.latch ], [ %l, %l4.body ] +; CHECK-LABEL: @PR18753( +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i64 [ %iv, %l4.latch ], [ %iv, %l4.body ] +; CHECK-NEXT: %l.le = trunc i64 %[[LCSSAPHI]] to i32 +; CHECK-NEXT: ret i32 %l.le + + ret i32 %lcssa +} + +declare void @f(i32*) + +declare void @g() diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll index 886d7f2..df7f0a9 100644 --- a/test/Transforms/LICM/volatile-alias.ll +++ b/test/Transforms/LICM/volatile-alias.ll @@ -4,7 +4,7 @@ ; out of the loop. ; CHECK: load i32* %p ; CHECK: for.body: -; CHECK; load volatile i32* %q +; CHECK: load volatile i32* %q target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll index 314a149..3bd6d7a 100644 --- a/test/Transforms/LoopReroll/basic.ll +++ b/test/Transforms/LoopReroll/basic.ll @@ -33,7 +33,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ] ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1 ; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498 +; CHECK: %exitcond1 = icmp eq i32 %indvar, 497 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body ; CHECK: ret @@ -83,7 +83,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500 +; CHECK: %exitcond = icmp eq i64 %indvar, 1499 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -131,7 +131,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500 +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body ; CHECK: ret @@ -213,7 +213,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %add = fadd float %1, %mul ; CHECK: store float %add, float* %arrayidx2, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200 +; CHECK: %exitcond = icmp eq i64 %indvar, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -313,7 +313,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %add = fadd float %2, %mul ; CHECK: store float %add, float* %arrayidx4, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200 +; CHECK: %exitcond = icmp eq i64 %indvar, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll new file mode 100644 index 0000000..a45469b --- /dev/null +++ b/test/Transforms/LoopReroll/nonconst_lb.ll @@ -0,0 +1,152 @@ +; RUN: opt < %s -loop-reroll -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-none-linux" + +;void foo(int *A, int *B, int m, int n) { +; for (int i = m; i < n; i+=4) { +; A[i+0] = B[i+0] * 4; +; A[i+1] = B[i+1] * 4; +; A[i+2] = B[i+2] * 4; +; A[i+3] = B[i+3] * 4; +; } +;} +define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) { +entry: + %cmp34 = icmp slt i32 %m, %n + br i1 %cmp34, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ] + %arrayidx = getelementptr inbounds i32* %B, i32 %i.035 + %0 = load i32* %arrayidx, align 4 + %mul = shl nsw i32 %0, 2 + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035 + store i32 %mul, i32* %arrayidx2, align 4 + %add3 = add nsw i32 %i.035, 1 + %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3 + %1 = load i32* %arrayidx4, align 4 + %mul5 = shl nsw i32 %1, 2 + %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3 + store i32 %mul5, i32* %arrayidx7, align 4 + %add8 = add nsw i32 %i.035, 2 + %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8 + %2 = load i32* %arrayidx9, align 4 + %mul10 = shl nsw i32 %2, 2 + %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8 + store i32 %mul10, i32* %arrayidx12, align 4 + %add13 = add nsw i32 %i.035, 3 + %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13 + %3 = load i32* %arrayidx14, align 4 + %mul15 = shl nsw i32 %3, 2 + %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13 + store i32 %mul15, i32* %arrayidx17, align 4 + %add18 = add nsw i32 %i.035, 4 + %cmp = icmp slt i32 %add18, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} +; CHECK-LABEL: @foo +; CHECK: for.body.preheader: ; preds = %entry +; CHECK: %0 = add i32 %n, -1 +; CHECK: %1 = sub i32 %0, %m +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = mul i32 %2, 4 +; CHECK: %4 = add i32 %m, %3 +; CHECK: %5 = add i32 %4, 3 +; CHECK: br label %for.body + +; CHECK: for.body: ; preds = %for.body, %for.body.preheader +; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK: %6 = add i32 %m, %indvar +; CHECK: %arrayidx = getelementptr inbounds i32* %B, i32 %6 +; CHECK: %7 = load i32* %arrayidx, align 4 +; CHECK: %mul = shl nsw i32 %7, 2 +; CHECK: %arrayidx2 = getelementptr inbounds i32* %A, i32 %6 +; CHECK: store i32 %mul, i32* %arrayidx2, align 4 +; CHECK: %indvar.next = add i32 %indvar, 1 +; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: br i1 %exitcond, label %for.end, label %for.body + +;void daxpy_ur(int n,float da,float *dx,float *dy) +; { +; int m = n % 4; +; for (int i = m; i < n; i = i + 4) +; { +; dy[i] = dy[i] + da*dx[i]; +; dy[i+1] = dy[i+1] + da*dx[i+1]; +; dy[i+2] = dy[i+2] + da*dx[i+2]; +; dy[i+3] = dy[i+3] + da*dx[i+3]; +; } +; } +define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) { +entry: + %rem = srem i32 %n, 4 + %cmp55 = icmp slt i32 %rem, %n + br i1 %cmp55, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ] + %arrayidx = getelementptr inbounds float* %dy, i32 %i.056 + %0 = load float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056 + %1 = load float* %arrayidx1, align 4 + %mul = fmul float %1, %da + %add = fadd float %0, %mul + store float %add, float* %arrayidx, align 4 + %add3 = add nsw i32 %i.056, 1 + %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3 + %2 = load float* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3 + %3 = load float* %arrayidx6, align 4 + %mul7 = fmul float %3, %da + %add8 = fadd float %2, %mul7 + store float %add8, float* %arrayidx4, align 4 + %add11 = add nsw i32 %i.056, 2 + %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11 + %4 = load float* %arrayidx12, align 4 + %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11 + %5 = load float* %arrayidx14, align 4 + %mul15 = fmul float %5, %da + %add16 = fadd float %4, %mul15 + store float %add16, float* %arrayidx12, align 4 + %add19 = add nsw i32 %i.056, 3 + %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19 + %6 = load float* %arrayidx20, align 4 + %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19 + %7 = load float* %arrayidx22, align 4 + %mul23 = fmul float %7, %da + %add24 = fadd float %6, %mul23 + store float %add24, float* %arrayidx20, align 4 + %add27 = add nsw i32 %i.056, 4 + %cmp = icmp slt i32 %add27, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; CHECK-LABEL: @daxpy_ur +; CHECK: for.body.preheader: +; CHECK: %0 = add i32 %n, -1 +; CHECK: %1 = sub i32 %0, %rem +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = mul i32 %2, 4 +; CHECK: %4 = add i32 %rem, %3 +; CHECK: %5 = add i32 %4, 3 +; CHECK: br label %for.body + +; CHECK: for.body: +; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK: %6 = add i32 %rem, %indvar +; CHECK: %arrayidx = getelementptr inbounds float* %dy, i32 %6 +; CHECK: %7 = load float* %arrayidx, align 4 +; CHECK: %arrayidx1 = getelementptr inbounds float* %dx, i32 %6 +; CHECK: %8 = load float* %arrayidx1, align 4 +; CHECK: %mul = fmul float %8, %da +; CHECK: %add = fadd float %7, %mul +; CHECK: store float %add, float* %arrayidx, align 4 +; CHECK: %indvar.next = add i32 %indvar, 1 +; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: br i1 %exitcond, label %for.end, label %for.body diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll index aed7670..c9991c7 100644 --- a/test/Transforms/LoopReroll/reduction.ll +++ b/test/Transforms/LoopReroll/reduction.ll @@ -38,7 +38,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %0 = load i32* %arrayidx, align 4 ; CHECK: %add = add nsw i32 %0, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 400 +; CHECK: %exitcond = icmp eq i64 %indvar, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -83,7 +83,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %0 = load float* %arrayidx, align 4 ; CHECK: %add = fadd float %0, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 400 +; CHECK: %exitcond = icmp eq i64 %indvar, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret diff --git a/test/Transforms/LoopRotate/PhiSelfReference-1.ll b/test/Transforms/LoopRotate/PhiSelfReference-1.ll new file mode 100644 index 0000000..aa1708e --- /dev/null +++ b/test/Transforms/LoopRotate/PhiSelfReference-1.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output +; ModuleID = 'PhiSelfReference-1.bc' + +define void @snrm2(i32 %incx) { +entry: + br i1 false, label %START, label %return + +START: ; preds = %entry + br i1 false, label %bb85, label %cond_false93 + +bb52: ; preds = %bb85 + br i1 false, label %bb307, label %cond_next71 + +cond_next71: ; preds = %bb52 + ret void + +bb85: ; preds = %START + br i1 false, label %bb52, label %bb88 + +bb88: ; preds = %bb85 + ret void + +cond_false93: ; preds = %START + ret void + +bb243: ; preds = %bb307 + br label %bb307 + +bb307: ; preds = %bb243, %bb52 + %sx_addr.2.pn = phi float* [ %sx_addr.5, %bb243 ], [ null, %bb52 ] ; [#uses=1] + %sx_addr.5 = getelementptr float* %sx_addr.2.pn, i32 %incx ; [#uses=1] + br i1 false, label %bb243, label %bb310 + +bb310: ; preds = %bb307 + ret void + +return: ; preds = %entry + ret void +} diff --git a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll b/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll deleted file mode 100644 index a1aa21b..0000000 --- a/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output -; ModuleID = 'PhiSelfRefernce-1.bc' - -define void @snrm2(i32 %incx) { -entry: - br i1 false, label %START, label %return - -START: ; preds = %entry - br i1 false, label %bb85, label %cond_false93 - -bb52: ; preds = %bb85 - br i1 false, label %bb307, label %cond_next71 - -cond_next71: ; preds = %bb52 - ret void - -bb85: ; preds = %START - br i1 false, label %bb52, label %bb88 - -bb88: ; preds = %bb85 - ret void - -cond_false93: ; preds = %START - ret void - -bb243: ; preds = %bb307 - br label %bb307 - -bb307: ; preds = %bb243, %bb52 - %sx_addr.2.pn = phi float* [ %sx_addr.5, %bb243 ], [ null, %bb52 ] ; [#uses=1] - %sx_addr.5 = getelementptr float* %sx_addr.2.pn, i32 %incx ; [#uses=1] - br i1 false, label %bb243, label %bb310 - -bb310: ; preds = %bb307 - ret void - -return: ; preds = %entry - ret void -} diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll index 9461980..50fc965 100644 --- a/test/Transforms/LoopRotate/dbgvalue.ll +++ b/test/Transforms/LoopRotate/dbgvalue.ll @@ -46,7 +46,11 @@ define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) { ; CHECK-LABEL: define void @FindFreeHorzSeg( ; CHECK: %dec = add ; CHECK-NEXT: tail call void @llvm.dbg.value -; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end +; CHECK-NEXT: br i1 %tobool, label %for.cond, label %[[LOOP_EXIT:[^,]*]] +; CHECK: [[LOOP_EXIT]]: +; CHECK-NEXT: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ] +; CHECK-NEXT: br label %for.end + entry: br label %for.cond diff --git a/test/Transforms/LoopRotate/preserve-loop-simplify.ll b/test/Transforms/LoopRotate/preserve-loop-simplify.ll new file mode 100644 index 0000000..53fa02a --- /dev/null +++ b/test/Transforms/LoopRotate/preserve-loop-simplify.ll @@ -0,0 +1,65 @@ +; RUN: opt -S -loop-rotate < %s -verify-loop-info | FileCheck %s +; +; Verify that LoopRotate preserves LoopSimplify form even in very peculiar loop +; structures. We manually validate the CFG with FileCheck because currently we +; can't cause a failure when LoopSimplify fails to be preserved. + +define void @PR18643() { +; CHECK-LABEL: @PR18643( +entry: + br label %outer.header +; CHECK: br label %outer.header + +outer.header: +; CHECK: outer.header: + br i1 undef, label %inner.header, label %outer.body +; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_PREROTATE_PREHEADER:[^,]*]], label %outer.body + +; CHECK: [[INNER_PREROTATE_PREHEADER]]: +; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_PREROTATE_PREHEADER_SPLIT_RETURN:[^,]*]], label %[[INNER_ROTATED_PREHEADER:[^,]*]] + +; CHECK: [[INNER_ROTATED_PREHEADER]]: +; CHECK-NEXT: br label %inner.body + +inner.header: +; Now the latch! +; CHECK: inner.header: + br i1 undef, label %return, label %inner.body +; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_SPLIT_RETURN:[^,]*]], label %inner.body + +inner.body: +; Now the header! +; CHECK: inner.body: + br i1 undef, label %outer.latch, label %inner.latch +; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_SPLIT_OUTER_LATCH:[^,]*]], label %inner.header + +inner.latch: +; Dead! + br label %inner.header + +outer.body: +; CHECK: outer.body: + br label %outer.latch +; CHECK-NEXT: br label %outer.latch + +; L2 -> L1 exit edge needs a simplified exit block. +; CHECK: [[INNER_SPLIT_OUTER_LATCH]]: +; CHECK-NEXT: br label %outer.latch + +outer.latch: +; CHECK: outer.latch: + br label %outer.header +; CHECK-NEXT: br label %outer.header + +; L1 -> L0 exit edge need sa simplified exit block. +; CHECK: [[INNER_PREROTATE_PREHEADER_SPLIT_RETURN]]: +; CHECK-NEXT: br label %return + +; L2 -> L0 exit edge needs a simplified exit block. +; CHECK: [[INNER_SPLIT_RETURN]]: +; CHECK-NEXT: br label %return + +return: +; CHECK: return: + unreachable +} diff --git a/test/Transforms/LoopSimplify/ashr-crash.ll b/test/Transforms/LoopSimplify/ashr-crash.ll new file mode 100644 index 0000000..c58903d --- /dev/null +++ b/test/Transforms/LoopSimplify/ashr-crash.ll @@ -0,0 +1,80 @@ +; RUN: opt -basicaa -loop-rotate -licm -instcombine -indvars -loop-unroll -S %s | FileCheck %s +; +; PR18361: ScalarEvolution::getAddRecExpr(): +; Assertion `isLoopInvariant(Operands[i],... +; +; After a series of loop optimizations, SCEV's LoopDispositions grow stale. +; In particular, LoopSimplify hoists %cmp4, resulting in this SCEV for %add: +; {(zext i1 %cmp4 to i32),+,1}<%for.cond1.preheader> +; +; When recomputing the SCEV for %ashr, we truncate the operands to get: +; (zext i1 %cmp4 to i16) +; +; This SCEV was never mapped to a value so never invalidated. It's +; loop disposition is still marked as non-loop-invariant, which is +; inconsistent with the AddRec. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +@d = common global i32 0, align 4 +@a = common global i32 0, align 4 +@c = common global i32 0, align 4 +@b = common global i32 0, align 4 + +; Check that the def-use chain that leads to the bad SCEV is still +; there. +; +; CHECK-LABEL: @foo +; CHECK-LABEL: entry: +; CHECK-LABEL: for.cond1.preheader: +; CHECK-LABEL: for.body3: +; CHECK: %cmp4.le.le +; CHECK: %conv.le.le = zext i1 %cmp4.le.le to i32 +; CHECK: %xor.le.le = xor i32 %conv6.le.le, 1 +define void @foo() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc7, %entry + %storemerge = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ] + %f.0 = phi i32 [ undef, %entry ], [ %f.1, %for.inc7 ] + store i32 %storemerge, i32* @d, align 4 + %cmp = icmp slt i32 %storemerge, 1 + br i1 %cmp, label %for.cond1, label %for.end9 + +for.cond1: ; preds = %for.cond, %for.body3 + %storemerge1 = phi i32 [ %inc, %for.body3 ], [ 0, %for.cond ] + %f.1 = phi i32 [ %xor, %for.body3 ], [ %f.0, %for.cond ] + store i32 %storemerge1, i32* @a, align 4 + %cmp2 = icmp slt i32 %storemerge1, 1 + br i1 %cmp2, label %for.body3, label %for.inc7 + +for.body3: ; preds = %for.cond1 + %0 = load i32* @c, align 4 + %cmp4 = icmp sge i32 %storemerge1, %0 + %conv = zext i1 %cmp4 to i32 + %1 = load i32* @d, align 4 + %add = add nsw i32 %conv, %1 + %sext = shl i32 %add, 16 + %conv6 = ashr exact i32 %sext, 16 + %xor = xor i32 %conv6, 1 + %inc = add nsw i32 %storemerge1, 1 + br label %for.cond1 + +for.inc7: ; preds = %for.cond1 + %2 = load i32* @d, align 4 + %inc8 = add nsw i32 %2, 1 + br label %for.cond + +for.end9: ; preds = %for.cond + %cmp10 = icmp sgt i32 %f.0, 0 + br i1 %cmp10, label %if.then, label %if.end + +if.then: ; preds = %for.end9 + store i32 0, i32* @b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.end9 + ret void +} diff --git a/test/Transforms/LoopSimplify/notify-scev.ll b/test/Transforms/LoopSimplify/notify-scev.ll new file mode 100644 index 0000000..ee8e2ee --- /dev/null +++ b/test/Transforms/LoopSimplify/notify-scev.ll @@ -0,0 +1,110 @@ +; RUN: opt -indvars -S %s | FileCheck %s +; +; PR18384: ValueHandleBase::ValueIsDeleted. +; +; Ensure that LoopSimplify calls ScalarEvolution::forgetLoop before +; deleting a block, regardless of whether any values were hoisted out +; of the block. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin" + +%struct.Params = type { [2 x [4 x [16 x i16]]] } + +; Verify that the loop tail is deleted, and we don't crash! +; +; CHECK-LABEL: @t +; CHECK-LABEL: for.cond127.preheader: +; CHECK-NOT: for.cond127: +; CHECK-LABEL: for.body129: +define void @t() { +entry: + br label %for.body102 + +for.body102: + br i1 undef, label %for.cond127.preheader, label %for.inc203 + +for.cond127.preheader: + br label %for.body129 + +for.cond127: + %cmp128 = icmp slt i32 %inc191, 2 + br i1 %cmp128, label %for.body129, label %for.end192 + +for.body129: + %uv.013 = phi i32 [ 0, %for.cond127.preheader ], [ %inc191, %for.cond127 ] + %idxprom130 = sext i32 %uv.013 to i64 + br i1 undef, label %for.cond135.preheader.lr.ph, label %for.end185 + +for.cond135.preheader.lr.ph: + br i1 undef, label %for.cond135.preheader.lr.ph.split.us, label %for.cond135.preheader.lr.ph.split_crit_edge + +for.cond135.preheader.lr.ph.split_crit_edge: + br label %for.cond135.preheader.lr.ph.split + +for.cond135.preheader.lr.ph.split.us: + br label %for.cond135.preheader.us + +for.cond135.preheader.us: + %block_y.09.us = phi i32 [ 0, %for.cond135.preheader.lr.ph.split.us ], [ %add184.us, %for.cond132.us ] + br i1 true, label %for.cond138.preheader.lr.ph.us, label %for.end178.us + +for.end178.us: + %add184.us = add nsw i32 %block_y.09.us, 4 + br i1 undef, label %for.end185split.us-lcssa.us, label %for.cond132.us + +for.end174.us: + br i1 undef, label %for.cond138.preheader.us, label %for.cond135.for.end178_crit_edge.us + +for.inc172.us: + br i1 undef, label %for.cond142.preheader.us, label %for.end174.us + +for.body145.us: + %arrayidx163.us = getelementptr inbounds %struct.Params* undef, i64 0, i32 0, i64 %idxprom130, i64 %idxprom146.us + br i1 undef, label %for.body145.us, label %for.inc172.us + +for.cond142.preheader.us: + %j.04.us = phi i32 [ %block_y.09.us, %for.cond138.preheader.us ], [ undef, %for.inc172.us ] + %idxprom146.us = sext i32 %j.04.us to i64 + br label %for.body145.us + +for.cond138.preheader.us: + br label %for.cond142.preheader.us + +for.cond132.us: + br i1 undef, label %for.cond135.preheader.us, label %for.cond132.for.end185_crit_edge.us-lcssa.us + +for.cond138.preheader.lr.ph.us: + br label %for.cond138.preheader.us + +for.cond135.for.end178_crit_edge.us: + br label %for.end178.us + +for.end185split.us-lcssa.us: + br label %for.end185split + +for.cond132.for.end185_crit_edge.us-lcssa.us: + br label %for.cond132.for.end185_crit_edge + +for.cond135.preheader.lr.ph.split: + br label %for.end185split + +for.end185split: + br label %for.end185 + +for.cond132.for.end185_crit_edge: + br label %for.end185 + +for.end185: + %inc191 = add nsw i32 %uv.013, 1 + br i1 false, label %for.end192, label %for.cond127 + +for.end192: + br label %for.inc203 + +for.inc203: + br label %for.end205 + +for.end205: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg new file mode 100644 index 0000000..a499579 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll'] + +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll new file mode 100644 index 0000000..9a175ad --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid < %s | FileCheck %s +; rdar://10232252 +; Prevent LSR of doing poor choice that cannot be folded in addressing mode + +; Remove the -pre-RA-sched=list-hybrid option after fixing: +; [ARM64][coalescer] need better register +; coalescing for simple unit tests. + +; CHECK: testCase +; CHECK: %while.body{{$}} +; CHECK: ldr [[STREG:x[0-9]+]], [{{x[0-9]+}}], #8 +; CHECK-NEXT: str [[STREG]], [{{x[0-9]+}}], #8 +; CHECK: %while.end +define i32 @testCase() nounwind ssp { +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %len.06 = phi i64 [ 1288, %entry ], [ %sub, %while.body ] + %pDst.05 = phi i64* [ inttoptr (i64 6442450944 to i64*), %entry ], [ %incdec.ptr1, %while.body ] + %pSrc.04 = phi i64* [ inttoptr (i64 4294967296 to i64*), %entry ], [ %incdec.ptr, %while.body ] + %incdec.ptr = getelementptr inbounds i64* %pSrc.04, i64 1 + %tmp = load volatile i64* %pSrc.04, align 8 + %incdec.ptr1 = getelementptr inbounds i64* %pDst.05, i64 1 + store volatile i64 %tmp, i64* %pDst.05, align 8 + %sub = add i64 %len.06, -8 + %cmp = icmp sgt i64 %sub, -1 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body + tail call void inttoptr (i64 6442450944 to void ()*)() nounwind + ret i32 0 +} diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll new file mode 100644 index 0000000..10b2c3a --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll @@ -0,0 +1,101 @@ +; RUN: llc < %s -O3 -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s +; [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys +; +; LSR on loop %while.cond should reassociate non-address mode +; expressions at use %cmp16 to avoid sinking computation into %while.body18. +; +; Remove the -pre-RA-sched=list-hybrid option after fixing: +; [ARM64][coalescer] need better register +; coalescing for simple unit tests. + +; CHECK: @memset +; CHECK: %while.body18{{$}} +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8 +; First set the IVREG variable, then use it +; CHECK-NEXT: sub [[IVREG:x[0-9]+]], +; CHECK: [[IVREG]], #8 +; CHECK-NEXT: cmp [[IVREG]], #7 +; CHECK-NEXT: b.hi +define i8* @memset(i8* %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat { +entry: + %cmp = icmp eq i64 %len, 0 + br i1 %cmp, label %done, label %while.cond.preheader + +while.cond.preheader: ; preds = %entry + %conv = trunc i32 %val to i8 + br label %while.cond + +while.cond: ; preds = %while.body, %while.cond.preheader + %ptr.0 = phi i8* [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ] + %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ] + %cond = icmp eq i64 %len.addr.0, 0 + br i1 %cond, label %done, label %land.rhs + +land.rhs: ; preds = %while.cond + %0 = ptrtoint i8* %ptr.0 to i64 + %and = and i64 %0, 7 + %cmp5 = icmp eq i64 %and, 0 + br i1 %cmp5, label %if.end9, label %while.body + +while.body: ; preds = %land.rhs + %incdec.ptr = getelementptr inbounds i8* %ptr.0, i64 1 + store i8 %conv, i8* %ptr.0, align 1, !tbaa !0 + %dec = add i64 %len.addr.0, -1 + br label %while.cond + +if.end9: ; preds = %land.rhs + %conv.mask = and i32 %val, 255 + %1 = zext i32 %conv.mask to i64 + %2 = shl nuw nsw i64 %1, 8 + %ins18 = or i64 %2, %1 + %3 = shl nuw nsw i64 %1, 16 + %ins15 = or i64 %ins18, %3 + %4 = shl nuw nsw i64 %1, 24 + %5 = shl nuw nsw i64 %1, 32 + %mask8 = or i64 %ins15, %4 + %6 = shl nuw nsw i64 %1, 40 + %mask5 = or i64 %mask8, %5 + %7 = shl nuw nsw i64 %1, 48 + %8 = shl nuw i64 %1, 56 + %mask2.masked = or i64 %mask5, %6 + %mask = or i64 %mask2.masked, %7 + %ins = or i64 %mask, %8 + %9 = bitcast i8* %ptr.0 to i64* + %cmp1636 = icmp ugt i64 %len.addr.0, 7 + br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph + +while.body18: ; preds = %if.end9, %while.body18 + %wideptr.038 = phi i64* [ %incdec.ptr19, %while.body18 ], [ %9, %if.end9 ] + %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ] + %incdec.ptr19 = getelementptr inbounds i64* %wideptr.038, i64 1 + store i64 %ins, i64* %wideptr.038, align 8, !tbaa !2 + %sub = add i64 %len.addr.137, -8 + %cmp16 = icmp ugt i64 %sub, 7 + br i1 %cmp16, label %while.body18, label %while.end20 + +while.end20: ; preds = %while.body18 + %cmp21 = icmp eq i64 %sub, 0 + br i1 %cmp21, label %done, label %while.body29.lr.ph + +while.body29.lr.ph: ; preds = %while.end20, %if.end9 + %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ] + %wideptr.0.lcssa48 = phi i64* [ %incdec.ptr19, %while.end20 ], [ %9, %if.end9 ] + %10 = bitcast i64* %wideptr.0.lcssa48 to i8* + br label %while.body29 + +while.body29: ; preds = %while.body29, %while.body29.lr.ph + %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ] + %ptr.134 = phi i8* [ %10, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ] + %dec26 = add i64 %len.addr.235, -1 + %incdec.ptr31 = getelementptr inbounds i8* %ptr.134, i64 1 + store i8 %conv, i8* %ptr.134, align 1, !tbaa !0 + %cmp27 = icmp eq i64 %dec26, 0 + br i1 %cmp27, label %done, label %while.body29 + +done: ; preds = %while.cond, %while.body29, %while.end20, %entry + ret i8* %dest +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +!2 = metadata !{metadata !"long long", metadata !0} diff --git a/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll new file mode 100644 index 0000000..5506994 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll @@ -0,0 +1,50 @@ +; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @indvar_expansion(i8* nocapture readonly %rowsptr) { +entry: + br label %for.cond + +; SCEVExpander used to create induction variables in the loop %for.cond while +; expanding the recurrence start value of loop strength reduced values from +; %vector.body. + +; CHECK-LABEL: indvar_expansion +; CHECK: for.cond: +; CHECK-NOT: phi i3 +; CHECK: br i1 {{.+}}, label %for.cond + +for.cond: + %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ] + %cmp = icmp eq i8 undef, 0 + %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1 + br i1 %cmp, label %for.cond, label %for.cond2 + +for.cond2: + br i1 undef, label %for.cond2, label %for.body14.lr.ph + +for.body14.lr.ph: + %sext = shl i64 %indvars.iv44, 32 + %0 = ashr exact i64 %sext, 32 + %1 = sub i64 undef, %indvars.iv44 + %2 = and i64 %1, 4294967295 + %3 = add i64 %2, 1 + %fold = add i64 %1, 1 + %n.mod.vf = and i64 %fold, 7 + %n.vec = sub i64 %3, %n.mod.vf + %end.idx.rnd.down = add i64 %n.vec, %0 + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ] + %4 = getelementptr inbounds i8* %rowsptr, i64 %index + %5 = bitcast i8* %4 to <4 x i8>* + %wide.load = load <4 x i8>* %5, align 1 + %index.next = add i64 %index, 8 + %6 = icmp eq i64 %index.next, %end.idx.rnd.down + br i1 %6, label %for.end24, label %vector.body + +for.end24: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll new file mode 100644 index 0000000..e7ebaa8 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; LSR shouldn't normalize IV if it can't be denormalized to the original +; expression. In this testcase, the normalized expression was denormalized to +; an expression different from the original, and we were losing sign extension. + +; CHECK: [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8 +; CHECK: {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32 + +@j = common global i32 0, align 4 +@c = common global i32 0, align 4 +@g = common global i32 0, align 4 +@h = common global i8 0, align 1 +@d = common global i32 0, align 4 +@i = common global i32 0, align 4 +@e = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1 +@a = common global i32 0, align 4 +@b = common global i16 0, align 2 + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @main() #0 { +entry: + store i8 0, i8* @h, align 1 + %0 = load i32* @j, align 4 + %tobool.i = icmp eq i32 %0, 0 + %1 = load i32* @d, align 4 + %cmp3 = icmp sgt i32 %1, -1 + %.lobit = lshr i32 %1, 31 + %.lobit.not = xor i32 %.lobit, 1 + br label %for.body + +for.body: ; preds = %entry, %fn3.exit + %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ] + %conv = sext i8 %inc9 to i32 + br i1 %tobool.i, label %fn3.exit, label %land.rhs.i + +land.rhs.i: ; preds = %for.body + store i32 0, i32* @c, align 4 + br label %fn3.exit + +fn3.exit: ; preds = %for.body, %land.rhs.i + %inc = add i8 %inc9, 1 + %cmp = icmp sgt i8 %inc, -1 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %fn3.exit + %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0 + store i32 %conv, i32* @g, align 4 + store i32 %.lobit.not., i32* @i, align 4 + store i8 %inc, i8* @h, align 1 + %conv7 = sext i8 %inc to i32 + %add = add nsw i32 %conv7, %conv + store i32 %add, i32* @e, align 4 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2 + ret i32 0 +} + +; Function Attrs: nounwind optsize +declare i32 @printf(i8* nocapture readonly, ...) #1 + +attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll index 255cf41..aa688d9 100644 --- a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll +++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll @@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx" ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ] ; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216 ; -; CHECK=LABEL: for.end: +; CHECK-LABEL: for.end: ; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us ; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us ; CHECK: %f = ashr i32 %sext.us, 24 diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll new file mode 100644 index 0000000..c38d6a6 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/pr18165.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type. +; CHECK-NOT: trunc i32 %.ph to i8 + +%struct.anon = type { i32, i32, i32 } + +@c = global i32 1, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@b = common global i32 0, align 4 +@a = common global %struct.anon zeroinitializer, align 4 +@e = common global %struct.anon zeroinitializer, align 4 +@d = common global i32 0, align 4 +@f = common global i32 0, align 4 +@g = common global i32 0, align 4 +@h = common global i32 0, align 4 + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @main() #0 { +entry: + %0 = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1 + %tobool7.i = icmp eq i32 %0, 0 + %.promoted.i = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6 + %f.promoted.i = load i32* @f, align 4, !tbaa !7 + br label %for.body6.i.outer + +for.body6.i.outer: ; preds = %entry, %lor.end.i + %.ph = phi i32 [ %add.i, %lor.end.i ], [ 0, %entry ] + %or1512.i.ph = phi i32 [ %or15.i, %lor.end.i ], [ %f.promoted.i, %entry ] + %or1410.i.ph = phi i32 [ %or14.i, %lor.end.i ], [ %.promoted.i, %entry ] + %p.addr.16.i.ph = phi i8 [ %inc10.i, %lor.end.i ], [ -128, %entry ] + br i1 %tobool7.i, label %if.end9.i, label %lbl.loopexit.i + +lbl.loopexit.i: ; preds = %for.body6.i.outer, %lbl.loopexit.i + br label %lbl.loopexit.i + +if.end9.i: ; preds = %for.body6.i.outer + %inc10.i = add i8 %p.addr.16.i.ph, 1 + %tobool12.i = icmp eq i8 %p.addr.16.i.ph, 0 + br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i + +lor.rhs.i: ; preds = %if.end9.i + %1 = load i32* @b, align 4, !tbaa !7 + %dec.i = add nsw i32 %1, -1 + store i32 %dec.i, i32* @b, align 4, !tbaa !7 + %tobool13.i = icmp ne i32 %1, 0 + br label %lor.end.i + +lor.end.i: ; preds = %lor.rhs.i, %if.end9.i + %2 = phi i1 [ true, %if.end9.i ], [ %tobool13.i, %lor.rhs.i ] + %lor.ext.i = zext i1 %2 to i32 + %or14.i = or i32 %lor.ext.i, %or1410.i.ph + %or15.i = or i32 %or14.i, %or1512.i.ph + %add.i = add nsw i32 %.ph, 2 + %cmp.i = icmp slt i32 %add.i, 21 + br i1 %cmp.i, label %for.body6.i.outer, label %fn1.exit + +fn1.exit: ; preds = %lor.end.i + store i32 0, i32* @g, align 4, !tbaa !7 + store i32 %or14.i, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6 + store i32 %or15.i, i32* @f, align 4, !tbaa !7 + store i32 %add.i, i32* getelementptr inbounds (%struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8 + store i32 0, i32* @h, align 4, !tbaa !7 + %3 = load i32* @b, align 4, !tbaa !7 + %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2 + ret i32 0 +} + +; Function Attrs: nounwind optsize +declare i32 @printf(i8* nocapture readonly, ...) #1 + +attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.5 "} +!1 = metadata !{metadata !2, metadata !3, i64 0} +!2 = metadata !{metadata !"", metadata !3, i64 0, metadata !3, i64 4, metadata !3, i64 8} +!3 = metadata !{metadata !"int", metadata !4, i64 0} +!4 = metadata !{metadata !"omnipotent char", metadata !5, i64 0} +!5 = metadata !{metadata !"Simple C/C++ TBAA"} +!6 = metadata !{metadata !2, metadata !3, i64 8} +!7 = metadata !{metadata !3, metadata !3, i64 0} +!8 = metadata !{metadata !2, metadata !3, i64 4} diff --git a/test/Transforms/LoopUnroll/X86/lit.local.cfg b/test/Transforms/LoopUnroll/X86/lit.local.cfg new file mode 100644 index 0000000..ba763cf --- /dev/null +++ b/test/Transforms/LoopUnroll/X86/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll new file mode 100644 index 0000000..15867cb --- /dev/null +++ b/test/Transforms/LoopUnroll/X86/partial.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -S -loop-unroll -mcpu=nehalem -x86-use-partial-unrolling=1 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -mcpu=core -x86-use-partial-unrolling=1 | FileCheck -check-prefix=CHECK-NOUNRL %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 { +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds double* %b, i64 %index + %1 = bitcast double* %0 to <2 x double>* + %wide.load = load <2 x double>* %1, align 8 + %.sum9 = or i64 %index, 2 + %2 = getelementptr double* %b, i64 %.sum9 + %3 = bitcast double* %2 to <2 x double>* + %wide.load8 = load <2 x double>* %3, align 8 + %4 = fadd <2 x double> %wide.load, + %5 = fadd <2 x double> %wide.load8, + %6 = getelementptr inbounds double* %a, i64 %index + %7 = bitcast double* %6 to <2 x double>* + store <2 x double> %4, <2 x double>* %7, align 8 + %.sum10 = or i64 %index, 2 + %8 = getelementptr double* %a, i64 %.sum10 + %9 = bitcast double* %8 to <2 x double>* + store <2 x double> %5, <2 x double>* %9, align 8 + %index.next = add i64 %index, 4 + %10 = icmp eq i64 %index.next, 1600 + br i1 %10, label %for.end, label %vector.body + +; FIXME: We should probably unroll this loop by a factor of 2, but the cost +; model needs to be fixed to account for instructions likely to be folded +; as part of an addressing mode. +; CHECK-LABEL: @foo +; CHECK-NOUNRL-LABEL: @foo + +for.end: ; preds = %vector.body + ret void +} + +define void @bar(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 { +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %v0 = getelementptr inbounds double* %b, i64 %index + %v1 = bitcast double* %v0 to <2 x double>* + %wide.load = load <2 x double>* %v1, align 8 + %v4 = fadd <2 x double> %wide.load, + %v5 = fmul <2 x double> %v4, + %v6 = getelementptr inbounds double* %a, i64 %index + %v7 = bitcast double* %v6 to <2 x double>* + store <2 x double> %v5, <2 x double>* %v7, align 8 + %index.next = add i64 %index, 2 + %v10 = icmp eq i64 %index.next, 1600 + br i1 %v10, label %for.end, label %vector.body + +; FIXME: We should probably unroll this loop by a factor of 2, but the cost +; model needs to first to fixed to account for instructions likely to be folded +; as part of an addressing mode. + +; CHECK-LABEL: @bar +; CHECK: fadd +; CHECK-NEXT: fmul +; CHECK: fadd +; CHECK-NEXT: fmul + +; CHECK-NOUNRL-LABEL: @bar +; CHECK-NOUNRL: fadd +; CHECK-NOUNRL-NEXT: fmul +; CHECK-NOUNRL-NOT: fadd + +for.end: ; preds = %vector.body + ret void +} + +attributes #0 = { nounwind uwtable } + diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll index 39363ab..8843fc2 100644 --- a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll +++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s ; RUN: opt < %s -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFT +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFTUNROLL target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios3.0.0" @@ -30,3 +31,41 @@ define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp { %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ] ret i32 %sum.0.lcssa } + +; Verify the register limit. On arm we don't have 16 allocatable registers. +;SWIFTUNROLL-LABEL: @register_limit( +;SWIFTUNROLL: load i32 +;SWIFTUNROLL-NOT: load i32 +define i32 @register_limit(i32* nocapture %A, i32 %n) { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: + %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ] + %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ] + %sum.03 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ] + %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ] + %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32* %A, i32 %i.02 + %3 = load i32* %2, align 4 + %4 = add nsw i32 %3, %sum.01 + %5 = add nsw i32 %i.02, 1 + %6 = add nsw i32 %3, %sum.02 + %7 = add nsw i32 %3, %sum.03 + %8 = add nsw i32 %3, %sum.04 + %9 = add nsw i32 %3, %sum.05 + %10 = add nsw i32 %3, %sum.05 + %exitcond = icmp eq i32 %5, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ] + %sum.1.lcssa = phi i32 [ 0, %0 ], [ %6, %.lr.ph ] + %sum.2.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ] + %sum.4.lcssa = phi i32 [ 0, %0 ], [ %8, %.lr.ph ] + %sum.5.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + %sum.6.lcssa = phi i32 [ 0, %0 ], [ %10, %.lr.ph ] + ret i32 %sum.0.lcssa +} diff --git a/test/Transforms/LoopVectorize/ARM64/gather-cost.ll b/test/Transforms/LoopVectorize/ARM64/gather-cost.ll new file mode 100644 index 0000000..bb28538 --- /dev/null +++ b/test/Transforms/LoopVectorize/ARM64/gather-cost.ll @@ -0,0 +1,85 @@ +; RUN: opt -loop-vectorize -mtriple=arm64-apple-ios -S -mcpu=cyclone < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" + +@kernel = global [512 x float] zeroinitializer, align 16 +@kernel2 = global [512 x float] zeroinitializer, align 16 +@kernel3 = global [512 x float] zeroinitializer, align 16 +@kernel4 = global [512 x float] zeroinitializer, align 16 +@src_data = global [1536 x float] zeroinitializer, align 16 +@r_ = global i8 0, align 1 +@g_ = global i8 0, align 1 +@b_ = global i8 0, align 1 + +; We don't want to vectorize most loops containing gathers because they are +; expensive. +; Make sure we don't vectorize it. +; CHECK-NOT: x float> + +define void @_Z4testmm(i64 %size, i64 %offset) { +entry: + %cmp53 = icmp eq i64 %size, 0 + br i1 %cmp53, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %r.057 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add10, %for.body ] + %g.056 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add20, %for.body ] + %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ] + %add = add i64 %v.055, %offset + %mul = mul i64 %add, 3 + %arrayidx = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %mul + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [512 x float]* @kernel, i64 0, i64 %v.055 + %1 = load float* %arrayidx2, align 4 + %mul3 = fmul fast float %0, %1 + %arrayidx4 = getelementptr inbounds [512 x float]* @kernel2, i64 0, i64 %v.055 + %2 = load float* %arrayidx4, align 4 + %mul5 = fmul fast float %mul3, %2 + %arrayidx6 = getelementptr inbounds [512 x float]* @kernel3, i64 0, i64 %v.055 + %3 = load float* %arrayidx6, align 4 + %mul7 = fmul fast float %mul5, %3 + %arrayidx8 = getelementptr inbounds [512 x float]* @kernel4, i64 0, i64 %v.055 + %4 = load float* %arrayidx8, align 4 + %mul9 = fmul fast float %mul7, %4 + %add10 = fadd fast float %r.057, %mul9 + %arrayidx.sum = add i64 %mul, 1 + %arrayidx11 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum + %5 = load float* %arrayidx11, align 4 + %mul13 = fmul fast float %1, %5 + %mul15 = fmul fast float %2, %mul13 + %mul17 = fmul fast float %3, %mul15 + %mul19 = fmul fast float %4, %mul17 + %add20 = fadd fast float %g.056, %mul19 + %arrayidx.sum52 = add i64 %mul, 2 + %arrayidx21 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52 + %6 = load float* %arrayidx21, align 4 + %mul23 = fmul fast float %1, %6 + %mul25 = fmul fast float %2, %mul23 + %mul27 = fmul fast float %3, %mul25 + %mul29 = fmul fast float %4, %mul27 + %add30 = fadd fast float %b.054, %mul29 + %inc = add i64 %v.055, 1 + %exitcond = icmp ne i64 %inc, %size + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: + %add30.lcssa = phi float [ %add30, %for.body ] + %add20.lcssa = phi float [ %add20, %for.body ] + %add10.lcssa = phi float [ %add10, %for.body ] + %phitmp = fptoui float %add10.lcssa to i8 + %phitmp60 = fptoui float %add20.lcssa to i8 + %phitmp61 = fptoui float %add30.lcssa to i8 + br label %for.end + +for.end: + %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] + %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ] + %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ] + store i8 %r.0.lcssa, i8* @r_, align 1 + store i8 %g.0.lcssa, i8* @g_, align 1 + store i8 %b.0.lcssa, i8* @b_, align 1 + ret void +} diff --git a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg new file mode 100644 index 0000000..de86e54 --- /dev/null +++ b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True + diff --git a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg new file mode 100644 index 0000000..2e46300 --- /dev/null +++ b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True + diff --git a/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll new file mode 100644 index 0000000..6cd9c4d --- /dev/null +++ b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -mcpu=pwr7 -mattr=+vsx -loop-vectorize -instcombine -S | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.GlobalData = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] } + +@global_data = external global %struct.GlobalData, align 16 +@ntimes = external hidden unnamed_addr global i32, align 4 + +define signext i32 @s173() #0 { +entry: + %0 = load i32* @ntimes, align 4 + %cmp21 = icmp sgt i32 %0, 0 + br i1 %cmp21, label %for.cond1.preheader, label %for.end12 + +for.cond1.preheader: ; preds = %for.end, %entry + %nl.022 = phi i32 [ %inc11, %for.end ], [ 0, %entry ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv + %1 = load float* %arrayidx, align 4 + %arrayidx5 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv + %2 = load float* %arrayidx5, align 4 + %add = fadd float %1, %2 + %3 = add nsw i64 %indvars.iv, 16000 + %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3 + store float %add, float* %arrayidx8, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16000 + br i1 %exitcond, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + %inc11 = add nsw i32 %nl.022, 1 + %4 = load i32* @ntimes, align 4 + %mul = mul nsw i32 %4, 10 + %cmp = icmp slt i32 %inc11, %mul + br i1 %cmp, label %for.cond1.preheader, label %for.end12 + +for.end12: ; preds = %for.end, %entry + ret i32 0 + +; CHECK-LABEL: @s173 +; CHECK: load <4 x float>* +; CHECK: add i64 %index, 16000 +; CHECK: ret i32 0 +} + +attributes #0 = { nounwind } + diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll index 885418c..faed77d 100644 --- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s +; RUN: opt < %s -disable-loop-unrolling -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; We want to make sure that we don't even try to vectorize loops again ; The vectorizer used to mark the un-vectorized loop only as already vectorized diff --git a/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll new file mode 100644 index 0000000..529ed88 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -mcpu=core-avx2 -loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +@float_array = common global [10000 x float] zeroinitializer, align 16 +@unsigned_array = common global [10000 x i32] zeroinitializer, align 16 + +; If we need to scalarize the fptoui and then use inserts to build up the +; vector again, then there is certainly no value in going 256-bit wide. +; CHECK-NOT: vinserti128 + +define void @convert(i32 %N) { +entry: + %0 = icmp eq i32 %N, 0 + br i1 %0, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds [10000 x float]* @float_array, i64 0, i64 %indvars.iv + %1 = load float* %arrayidx, align 4 + %conv = fptoui float %1 to i32 + %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll new file mode 100644 index 0000000..ef3e3be --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -mcpu=core-avx2 -loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +@n = global i32 10000, align 4 +@double_array = common global [10000 x double] zeroinitializer, align 16 +@unsigned_array = common global [10000 x i32] zeroinitializer, align 16 + +; If we need to scalarize the fptoui and then use inserts to build up the +; vector again, then there is certainly no value in going 256-bit wide. +; CHECK-NOT: vpinsrd + +define void @convert() { +entry: + %0 = load i32* @n, align 4 + %cmp4 = icmp eq i32 %0, 0 + br i1 %cmp4, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds [10000 x double]* @double_array, i64 0, i64 %indvars.iv + %1 = load double* %arrayidx, align 8 + %conv = fptoui double %1 to i32 + %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %2 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp ult i32 %2, %0 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll new file mode 100644 index 0000000..23e6227 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S -debug-only=loop-vectorize 2>&1 | FileCheck %s +; REQUIRES: asserts + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + + +; CHECK: cost of 7 for VF 8 For instruction: %conv = fptosi float %tmp to i8 +define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind { +entry: + br label %for.body +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %tmp = load float* %arrayidx, align 4 + %conv = fptosi float %tmp to i8 + %arrayidx2 = getelementptr inbounds i8* %a, i64 %indvars.iv + store i8 %conv, i8* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll new file mode 100644 index 0000000..224823b --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -0,0 +1,175 @@ +; RUN: opt < %s -mcpu=corei7 -O1 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1 +; RUN: opt < %s -mcpu=corei7 -O2 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O2 +; RUN: opt < %s -mcpu=corei7 -O3 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3 +; RUN: opt < %s -mcpu=corei7 -Os -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Os +; RUN: opt < %s -mcpu=corei7 -Oz -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Oz +; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC +; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC +; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC2 +; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC2 +; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3DIS + +; This file tests the llvm.vectorizer.pragma forcing vectorization even when +; optimization levels are too low, or when vectorization is disabled. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; O1-LABEL: @enabled( +; O1: store <4 x i32> +; O1: ret i32 +; O2-LABEL: @enabled( +; O2: store <4 x i32> +; O2: ret i32 +; O3-LABEL: @enabled( +; O3: store <4 x i32> +; O3: ret i32 +; Pragma always wins! +; O3DIS-LABEL: @enabled( +; O3DIS: store <4 x i32> +; O3DIS: ret i32 +; Os-LABEL: @enabled( +; Os: store <4 x i32> +; Os: ret i32 +; Oz-LABEL: @enabled( +; Oz: store <4 x i32> +; Oz: ret i32 +; O1VEC-LABEL: @enabled( +; O1VEC: store <4 x i32> +; O1VEC: ret i32 +; OzVEC-LABEL: @enabled( +; OzVEC: store <4 x i32> +; OzVEC: ret i32 +; O1VEC2-LABEL: @enabled( +; O1VEC2: store <4 x i32> +; O1VEC2: ret i32 +; OzVEC2-LABEL: @enabled( +; OzVEC2: store <4 x i32> +; OzVEC2: ret i32 + +define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 32 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + %1 = load i32* %a, align 4 + ret i32 %1 +} + +; O1-LABEL: @nopragma( +; O1-NOT: store <4 x i32> +; O1: ret i32 +; O2-LABEL: @nopragma( +; O2: store <4 x i32> +; O2: ret i32 +; O3-LABEL: @nopragma( +; O3: store <4 x i32> +; O3: ret i32 +; O3DIS-LABEL: @nopragma( +; O3DIS-NOT: store <4 x i32> +; O3DIS: ret i32 +; Os-LABEL: @nopragma( +; Os: store <4 x i32> +; Os: ret i32 +; Oz-LABEL: @nopragma( +; Oz-NOT: store <4 x i32> +; Oz: ret i32 +; O1VEC-LABEL: @nopragma( +; O1VEC: store <4 x i32> +; O1VEC: ret i32 +; OzVEC-LABEL: @nopragma( +; OzVEC: store <4 x i32> +; OzVEC: ret i32 +; O1VEC2-LABEL: @nopragma( +; O1VEC2: store <4 x i32> +; O1VEC2: ret i32 +; OzVEC2-LABEL: @nopragma( +; OzVEC2: store <4 x i32> +; OzVEC2: ret i32 + +define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 32 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %1 = load i32* %a, align 4 + ret i32 %1 +} + +; O1-LABEL: @disabled( +; O1-NOT: store <4 x i32> +; O1: ret i32 +; O2-LABEL: @disabled( +; O2-NOT: store <4 x i32> +; O2: ret i32 +; O3-LABEL: @disabled( +; O3-NOT: store <4 x i32> +; O3: ret i32 +; O3DIS-LABEL: @disabled( +; O3DIS-NOT: store <4 x i32> +; O3DIS: ret i32 +; Os-LABEL: @disabled( +; Os-NOT: store <4 x i32> +; Os: ret i32 +; Oz-LABEL: @disabled( +; Oz-NOT: store <4 x i32> +; Oz: ret i32 +; O1VEC-LABEL: @disabled( +; O1VEC-NOT: store <4 x i32> +; O1VEC: ret i32 +; OzVEC-LABEL: @disabled( +; OzVEC-NOT: store <4 x i32> +; OzVEC: ret i32 +; O1VEC2-LABEL: @disabled( +; O1VEC2-NOT: store <4 x i32> +; O1VEC2: ret i32 +; OzVEC2-LABEL: @disabled( +; OzVEC2-NOT: store <4 x i32> +; OzVEC2: ret i32 + +define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 32 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 + +for.end: ; preds = %for.body + %1 = load i32* %a, align 4 + ret i32 %1 +} + +!0 = metadata !{metadata !0, metadata !1} +!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1} +!2 = metadata !{metadata !2, metadata !3} +!3 = metadata !{metadata !"llvm.vectorizer.enable", i1 0} diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll index 14ac417..dfa4faa 100644 --- a/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/test/Transforms/LoopVectorize/X86/small-size.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -115,6 +115,31 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture ret void } +; N is unknown, we need a tail. Can't vectorize because the loop is cold. +;CHECK-LABEL: @example4( +;CHECK-NOT: <4 x i32> +;CHECK: ret void +define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) { + %1 = icmp eq i32 %n, 0 + br i1 %1, label %._crit_edge, label %.lr.ph, !prof !0 + +.lr.ph: ; preds = %0, %.lr.ph + %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ] + %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ] + %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ] + %2 = add nsw i32 %.05, -1 + %3 = getelementptr inbounds i32* %.023, i64 1 + %4 = load i32* %.023, align 16 + %5 = getelementptr inbounds i32* %.014, i64 1 + store i32 %4, i32* %.014, align 16 + %6 = icmp eq i32 %2, 0 + br i1 %6, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +} + +!0 = metadata !{metadata !"branch_weights", i32 64, i32 4} ; We can't vectorize this one because we need a runtime ptr check. ;CHECK-LABEL: @example23( diff --git a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll new file mode 100644 index 0000000..86c32b2 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S -debug-only=loop-vectorize 2>&1 | FileCheck %s +; REQUIRES: asserts + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + + +; CHECK: cost of 20 for VF 2 For instruction: %conv = uitofp i64 %tmp to double +; CHECK: cost of 40 for VF 4 For instruction: %conv = uitofp i64 %tmp to double +define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind { +entry: + br label %for.body +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i64* %a, i64 %indvars.iv + %tmp = load i64* %arrayidx, align 4 + %conv = uitofp i64 %tmp to double + %arrayidx2 = getelementptr inbounds double* %b, i64 %indvars.iv + store double %conv, double* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll index ea107dc..d5024bb 100644 --- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll +++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll @@ -1,13 +1,26 @@ -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \ +; RUN: | FileCheck %s --check-prefix=CHECK-VECTOR +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \ +; RUN: | FileCheck %s --check-prefix=CHECK-SCALAR target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -;CHECK-LABEL: @foo( -;CHECK: load <4 x i32> -;CHECK-NOT: load <4 x i32> -;CHECK: store <4 x i32> -;CHECK-NOT: store <4 x i32> -;CHECK: ret + +; We don't unroll this loop because it has a small constant trip count. +; +; CHECK-VECTOR-LABEL: @foo( +; CHECK-VECTOR: load <4 x i32> +; CHECK-VECTOR-NOT: load <4 x i32> +; CHECK-VECTOR: store <4 x i32> +; CHECK-VECTOR-NOT: store <4 x i32> +; CHECK-VECTOR: ret +; +; CHECK-SCALAR-LABEL: @foo( +; CHECK-SCALAR: load i32* +; CHECK-SCALAR-NOT: load i32* +; CHECK-SCALAR: store i32 +; CHECK-SCALAR-NOT: store i32 +; CHECK-SCALAR: ret define i32 @foo(i32* nocapture %A) nounwind uwtable ssp { br label %1 @@ -26,10 +39,18 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp { ret i32 undef } -;CHECK-LABEL: @bar( -;CHECK: store <4 x i32> -;CHECK: store <4 x i32> -;CHECK: ret +; But this is a good small loop to unroll as we don't know of a bound on its +; trip count. +; +; CHECK-VECTOR-LABEL: @bar( +; CHECK-VECTOR: store <4 x i32> +; CHECK-VECTOR: store <4 x i32> +; CHECK-VECTOR: ret +; +; CHECK-SCALAR-LABEL: @bar( +; CHECK-SCALAR: store i32 +; CHECK-SCALAR: store i32 +; CHECK-SCALAR: ret define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -48,3 +69,32 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp { ._crit_edge: ; preds = %.lr.ph, %0 ret i32 undef } + +; Also unroll if we need a runtime check but it was going to be added for +; vectorization anyways. +; CHECK-VECTOR-LABEL: @runtime_chk( +; CHECK-VECTOR: store <4 x float> +; CHECK-VECTOR: store <4 x float> +; +; But not if the unrolling would introduce the runtime check. +; CHECK-SCALAR-LABEL: @runtime_chk( +; CHECK-SCALAR: store float +; CHECK-SCALAR-NOT: store float +define void @runtime_chk(float* %A, float* %B, float %N) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %mul = fmul float %0, %N + %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + store float %mul, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll index a4ebb42..21d0937 100644 --- a/test/Transforms/LoopVectorize/flags.ll +++ b/test/Transforms/LoopVectorize/flags.ll @@ -51,3 +51,29 @@ define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp { ._crit_edge: ; preds = %.lr.ph, %0 ret i32 undef } + +; Make sure we copy fast math flags and use them for the final reduction. +; CHECK-LABEL: fast_math +; CHECK: load <4 x float> +; CHECK: fadd fast <4 x float> +; CHECK: br +; CHECK: fadd fast <4 x float> +; CHECK: fadd fast <4 x float> +define float @fast_math(float* noalias %s) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float* %s, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %add = fadd fast float %q.04, %0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + %add.lcssa = phi float [ %add, %for.body ] + ret float %add.lcssa +} diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll index c45098d..0dfbab0 100644 --- a/test/Transforms/LoopVectorize/float-reduction.ll +++ b/test/Transforms/LoopVectorize/float-reduction.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" ;CHECK-LABEL: @foo( -;CHECK: fadd <4 x float> +;CHECK: fadd fast <4 x float> ;CHECK: ret define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { entry: diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll index 0118fb4..d64d67f 100644 --- a/test/Transforms/LoopVectorize/global_alias.ll +++ b/test/Transforms/LoopVectorize/global_alias.ll @@ -387,7 +387,7 @@ for.end: ; preds = %for.cond ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @noAlias08( -; CHECK: sub nsw <4 x i32> +; CHECK: sub <4 x i32> ; CHECK: ret define i32 @noAlias08(i32 %a) #0 { @@ -439,7 +439,7 @@ for.end: ; preds = %for.cond ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @noAlias09( -; CHECK: sub nsw <4 x i32> +; CHECK: sub <4 x i32> ; CHECK: ret define i32 @noAlias09(i32 %a) #0 { @@ -491,7 +491,7 @@ for.end: ; preds = %for.cond ; return *(PA+a); ; } ; CHECK-LABEL: define i32 @noAlias10( -; CHECK-NOT: sub nsw <4 x i32> +; CHECK-NOT: sub {{.*}} <4 x i32> ; CHECK: ret ; ; TODO: This test vectorizes (with run-time check) on real targets with -O3) @@ -721,7 +721,7 @@ for.end: ; preds = %for.cond ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @noAlias14( -; CHECK: sub nsw <4 x i32> +; CHECK: sub <4 x i32> ; CHECK: ret define i32 @noAlias14(i32 %a) #0 { diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll new file mode 100644 index 0000000..7b0e181 --- /dev/null +++ b/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -0,0 +1,126 @@ +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Test predication of stores. +define i32 @test(i32* nocapture %f) #0 { +entry: + br label %for.body + +; VEC-LABEL: test +; VEC: %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, +; VEC: %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, +; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], +; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0 +; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true +; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]] +; +; VEC: [[cond]]: +; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0 +; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0 +; VEC: store i32 %[[v13]], i32* %[[v14]], align 4 +; VEC: br label %[[else:.+]] +; +; VEC: [[else]]: +; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1 +; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true +; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]] +; +; VEC: [[cond2]]: +; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1 +; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1 +; VEC: store i32 %[[v17]], i32* %[[v18]], align 4 +; VEC: br label %[[else2:.+]] +; +; VEC: [[else2]]: + +; UNROLL-LABEL: test +; UNROLL: vector.body: +; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0 +; UNROLL: %[[IND1:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 1 +; UNROLL: %[[v0:[a-zA-Z0-9]+]] = getelementptr inbounds i32* %f, i64 %[[IND]] +; UNROLL: %[[v1:[a-zA-Z0-9]+]] = getelementptr inbounds i32* %f, i64 %[[IND1]] +; UNROLL: %[[v2:[a-zA-Z0-9]+]] = load i32* %[[v0]], align 4 +; UNROLL: %[[v3:[a-zA-Z0-9]+]] = load i32* %[[v1]], align 4 +; UNROLL: %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100 +; UNROLL: %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100 +; UNROLL: %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20 +; UNROLL: %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20 +; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[v4]], true +; UNROLL: br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]] +; +; UNROLL: [[cond]]: +; UNROLL: store i32 %[[v6]], i32* %[[v0]], align 4 +; UNROLL: br label %[[else]] +; +; UNROLL: [[else]]: +; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[v5]], true +; UNROLL: br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]] +; +; UNROLL: [[cond2]]: +; UNROLL: store i32 %[[v7]], i32* %[[v1]], align 4 +; UNROLL: br label %[[else2]] +; +; UNROLL: [[else2]]: + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32* %f, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 100 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %add = add nsw i32 %0, 20 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 128 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 0 +} + +; Track basic blocks when unrolling conditional blocks. This code used to assert +; because we did not update the phi nodes with the proper predecessor in the +; vectorized loop body. +; PR18724 + +; UNROLL-LABEL: bug18724 +; UNROLL: store i32 +; UNROLL: store i32 + +define void @bug18724() { +entry: + br label %for.body9 + +for.body9: + br i1 undef, label %for.inc26, label %for.body14 + +for.body14: + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ undef, %for.body9 ] + %iNewChunks.120 = phi i32 [ %iNewChunks.2, %for.inc23 ], [ undef, %for.body9 ] + %arrayidx16 = getelementptr inbounds [768 x i32]* undef, i64 0, i64 %indvars.iv3 + %tmp = load i32* %arrayidx16, align 4 + br i1 undef, label %if.then18, label %for.inc23 + +if.then18: + store i32 2, i32* %arrayidx16, align 4 + %inc21 = add nsw i32 %iNewChunks.120, 1 + br label %for.inc23 + +for.inc23: + %iNewChunks.2 = phi i32 [ %inc21, %if.then18 ], [ %iNewChunks.120, %for.body14 ] + %indvars.iv.next4 = add nsw i64 %indvars.iv3, 1 + %tmp1 = trunc i64 %indvars.iv3 to i32 + %cmp13 = icmp slt i32 %tmp1, 0 + br i1 %cmp13, label %for.body14, label %for.inc26 + +for.inc26: + %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ] + unreachable +} diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll index d35bd58..71bedb7 100644 --- a/test/Transforms/LoopVectorize/increment.ll +++ b/test/Transforms/LoopVectorize/increment.ll @@ -34,7 +34,7 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp { ret void } -; Can't vectorize this loop because the access to A[X] is non linear. +; Can't vectorize this loop because the access to A[X] is non-linear. ; ; for (i = 0; i < n; ++i) { ; A[B[i]]++; diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index 50c3b6b..ad2c663 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -75,7 +75,7 @@ loopexit: ; PR17532 ; CHECK-LABEL: i8_loop -; CHECK; icmp eq i32 {{.*}}, 256 +; CHECK: icmp eq i32 {{.*}}, 256 define i32 @i8_loop() nounwind readnone ssp uwtable { br label %1 @@ -92,7 +92,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { } ; CHECK-LABEL: i16_loop -; CHECK; icmp eq i32 {{.*}}, 65536 +; CHECK: icmp eq i32 {{.*}}, 65536 define i32 @i16_loop() nounwind readnone ssp uwtable { br label %1 diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll new file mode 100644 index 0000000..88a29c5 --- /dev/null +++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll @@ -0,0 +1,42 @@ +; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; We must not vectorize this loop. %add55 is not reduction. Its value is used +; multiple times. + +; PR18526 + +; CHECK: multiple_use_of_value +; CHECK-NOT: <2 x i32> + +define void @multiple_use_of_value() { +entry: + %n = alloca i32, align 4 + %k7 = alloca i32, align 4 + %nf = alloca i32, align 4 + %0 = load i32* %k7, align 4 + %.neg1 = sub i32 0, %0 + %n.promoted = load i32* %n, align 4 + %nf.promoted = load i32* %nf, align 4 + br label %for.body + +for.body: + %inc107 = phi i32 [ undef, %entry ], [ %inc10, %for.body ] + %inc6 = phi i32 [ %nf.promoted, %entry ], [ undef, %for.body ] + %add55 = phi i32 [ %n.promoted, %entry ], [ %add5, %for.body ] + %.neg2 = sub i32 0, %inc6 + %add.neg = add i32 0, %add55 + %add4.neg = add i32 %add.neg, %.neg1 + %sub = add i32 %add4.neg, %.neg2 + %add5 = add i32 %sub, %add55 + %inc10 = add i32 %inc107, 1 + %cmp = icmp ult i32 %inc10, 61 + br i1 %cmp, label %for.body, label %for.end + +for.end: + %add5.lcssa = phi i32 [ %add5, %for.body ] + store i32 %add5.lcssa, i32* %n, align 4 + ret void +} diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll index a2b9ad9..e7b1e2a 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -7,11 +7,13 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: br ;CHECK: getelementptr ;CHECK-NEXT: getelementptr -;CHECK-NEXT: icmp uge -;CHECK-NEXT: icmp uge -;CHECK-NEXT: icmp uge -;CHECK-NEXT: icmp uge -;CHECK-NEXT: and +;CHECK-DAG: icmp uge +;CHECK-DAG: icmp uge +;CHECK-DAG: icmp uge +;CHECK-DAG: icmp uge +;CHECK-DAG: and +;CHECK-DAG: and +;CHECK: br ;CHECK: ret define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) { entry: diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll index 33f128d..89f4678 100644 --- a/test/Transforms/LoopVectorize/unroll_novec.ll +++ b/test/Transforms/LoopVectorize/unroll_novec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -12,10 +12,20 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK-LABEL: @inc( ;CHECK: load i32* ;CHECK: load i32* +;CHECK: load i32* +;CHECK: load i32* +;CHECK-NOT: load i32* +;CHECK: add nsw i32 ;CHECK: add nsw i32 ;CHECK: add nsw i32 +;CHECK: add nsw i32 +;CHECK-NOT: add nsw i32 +;CHECK: store i32 +;CHECK: store i32 ;CHECK: store i32 ;CHECK: store i32 +;CHECK-NOT: store i32 +;CHECK: add i64 %{{.*}}, 4 ;CHECK: ret void define void @inc(i32 %n) nounwind uwtable noinline ssp { %1 = icmp sgt i32 %n, 0 diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll index e8d3728..6b06afa 100644 --- a/test/Transforms/LoopVectorize/value-ptr-bug.ll +++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; PR16073 -; Because we were caching value pointers accross a function call that could RAUW +; Because we were caching value pointers across a function call that could RAUW ; we would generate an undefined value store below: ; SCEVExpander::expandCodeFor would change a value (the start value of an ; induction) that we cached in the induction variable list. diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll new file mode 100644 index 0000000..51d20e2 --- /dev/null +++ b/test/Transforms/LoopVectorize/version-mem-access.ll @@ -0,0 +1,87 @@ +; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: test +define void @test(i32* noalias %A, i64 %AStride, + i32* noalias %B, i32 %BStride, + i32* noalias %C, i64 %CStride, i32 %N) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +; CHECK-DAG: icmp ne i64 %AStride, 1 +; CHECK-DAG: icmp ne i32 %BStride, 1 +; CHECK-DAG: icmp ne i64 %CStride, 1 +; CHECK: or +; CHECK: or +; CHECK: br + +; CHECK: vector.body +; CHECK: load <2 x i32> + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %iv.trunc = trunc i64 %indvars.iv to i32 + %mul = mul i32 %iv.trunc, %BStride + %mul64 = zext i32 %mul to i64 + %arrayidx = getelementptr inbounds i32* %B, i64 %mul64 + %0 = load i32* %arrayidx, align 4 + %mul2 = mul nsw i64 %indvars.iv, %CStride + %arrayidx3 = getelementptr inbounds i32* %C, i64 %mul2 + %1 = load i32* %arrayidx3, align 4 + %mul4 = mul nsw i32 %1, %0 + %mul3 = mul nsw i64 %indvars.iv, %AStride + %arrayidx7 = getelementptr inbounds i32* %A, i64 %mul3 + store i32 %mul4, i32* %arrayidx7, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; We used to crash on this function because we removed the fptosi cast when +; replacing the symbolic stride '%conv'. +; PR18480 + +; CHECK-LABEL: fn1 +; CHECK: load <2 x double> + +define void @fn1(double* noalias %x, double* noalias %c, double %a) { +entry: + %conv = fptosi double %a to i32 + %cmp8 = icmp sgt i32 %conv, 0 + br i1 %cmp8, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %0 = trunc i64 %indvars.iv to i32 + %mul = mul nsw i32 %0, %conv + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds double* %x, i64 %idxprom + %1 = load double* %arrayidx, align 8 + %arrayidx3 = getelementptr inbounds double* %c, i64 %indvars.iv + store double %1, double* %arrayidx3, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %conv + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll index 4331677..c319834 100644 --- a/test/Transforms/LowerAtomic/atomic-swap.ll +++ b/test/Transforms/LowerAtomic/atomic-swap.ll @@ -3,7 +3,7 @@ define i8 @cmpswap() { ; CHECK-LABEL: @cmpswap( %i = alloca i8 - %j = cmpxchg i8* %i, i8 0, i8 42 monotonic + %j = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic ; CHECK: [[INST:%[a-z0-9]+]] = load ; CHECK-NEXT: icmp ; CHECK-NEXT: select diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll index 955209a..e184cb0 100644 --- a/test/Transforms/LowerExpectIntrinsic/basic.ll +++ b/test/Transforms/LowerExpectIntrinsic/basic.ll @@ -245,6 +245,35 @@ return: ; preds = %if.end, %if.then declare i32 @llvm.expect.i32(i32, i32) nounwind readnone +; CHECK-LABEL: @test9( +define i32 @test9(i32 %x) nounwind uwtable ssp { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + %tmp = load i32* %x.addr, align 4 + %cmp = icmp sgt i32 %tmp, 1 + %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1) +; CHECK: !prof !0 +; CHECK-NOT: @llvm.expect + br i1 %expval, label %if.then, label %if.end + +if.then: ; preds = %entry + %call = call i32 (...)* @f() + store i32 %call, i32* %retval + br label %return + +if.end: ; preds = %entry + store i32 1, i32* %retval + br label %return + +return: ; preds = %if.end, %if.then + %0 = load i32* %retval + ret i32 %0 +} + +declare i1 @llvm.expect.i1(i1, i1) nounwind readnone + ; CHECK: !0 = metadata !{metadata !"branch_weights", i32 64, i32 4} ; CHECK: !1 = metadata !{metadata !"branch_weights", i32 4, i32 64} ; CHECK: !2 = metadata !{metadata !"branch_weights", i32 4, i32 64, i32 4} diff --git a/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll b/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll deleted file mode 100644 index bddb702..0000000 --- a/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output - -define void @_ZNKSt11__use_cacheISt16__numpunct_cacheIcEEclERKSt6locale() { -entry: - br i1 false, label %then, label %UnifiedReturnBlock -then: ; preds = %entry - invoke void @_Znwj( ) - to label %UnifiedReturnBlock unwind label %UnifiedReturnBlock -UnifiedReturnBlock: ; preds = %then, %then, %entry - %UnifiedRetVal = phi i32* [ null, %entry ], [ null, %then ], [ null, %then ] ; [#uses=0] - ret void -} - -declare void @_Znwj() - diff --git a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll deleted file mode 100644 index 1057ad7..0000000 --- a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output - -declare void @ll_listnext__listiterPtr() - -define void @WorkTask.fn() { -block0: - invoke void @ll_listnext__listiterPtr( ) - to label %block9 unwind label %block8_exception_handling -block8_exception_handling: ; preds = %block0 - ret void -block9: ; preds = %block0 - %w_2690 = phi { i32, i32 }* [ null, %block0 ] ; <{ i32, i32 }*> [#uses=1] - %tmp.129 = getelementptr { i32, i32 }* %w_2690, i32 0, i32 1 ; [#uses=1] - %v2769 = load i32* %tmp.129 ; [#uses=0] - ret void -} - diff --git a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll b/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll deleted file mode 100644 index 9402046..0000000 --- a/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output - -declare fastcc i32 @ll_listnext__listiterPtr() - -define fastcc i32 @WorkTask.fn() { -block0: - %v2679 = invoke fastcc i32 @ll_listnext__listiterPtr( ) - to label %block9 unwind label %block8_exception_handling ; [#uses=1] -block8_exception_handling: ; preds = %block0 - ret i32 0 -block9: ; preds = %block0 - %i_2689 = phi i32 [ %v2679, %block0 ] ; [#uses=1] - ret i32 %i_2689 -} - diff --git a/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll b/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll deleted file mode 100644 index b46ccfb..0000000 --- a/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output -; PR2029 -define i32 @main(i32 %argc, i8** %argv) { -bb470: - invoke i32 @main(i32 0, i8** null) to label %invcont474 unwind label -%lpad902 - -invcont474: ; preds = %bb470 - ret i32 0 - -lpad902: ; preds = %bb470 - %tmp471.lcssa = phi i8* [ null, %bb470 ] ; - ret i32 0 -} diff --git a/test/Transforms/LowerInvoke/basictest.ll b/test/Transforms/LowerInvoke/basictest.ll deleted file mode 100644 index f0ca5f4..0000000 --- a/test/Transforms/LowerInvoke/basictest.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: opt < %s -lowerinvoke -disable-output -enable-correct-eh-support - - -define i32 @foo() { - invoke i32 @foo( ) - to label %Ok unwind label %Crap ; :1 [#uses=0] -Ok: ; preds = %0 - invoke i32 @foo( ) - to label %Ok2 unwind label %Crap ; :2 [#uses=0] -Ok2: ; preds = %Ok - ret i32 2 -Crap: ; preds = %Ok, %0 - ret i32 1 -} - -define i32 @bar(i32 %blah) { - br label %doit -doit: ; preds = %0 - ;; Value live across an unwind edge. - %B2 = add i32 %blah, 1 ; [#uses=1] - invoke i32 @foo( ) - to label %Ok unwind label %Crap ; :1 [#uses=0] -Ok: ; preds = %doit - invoke i32 @foo( ) - to label %Ok2 unwind label %Crap ; :2 [#uses=0] -Ok2: ; preds = %Ok - ret i32 2 -Crap: ; preds = %Ok, %doit - ret i32 %B2 -} diff --git a/test/Transforms/LowerInvoke/lowerinvoke.ll b/test/Transforms/LowerInvoke/lowerinvoke.ll new file mode 100644 index 0000000..05c19be --- /dev/null +++ b/test/Transforms/LowerInvoke/lowerinvoke.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -lowerinvoke -S | FileCheck %s + +declare i32 @external_func(i64 %arg) + +define i32 @invoke_test(i64 %arg) { +entry: + %result = invoke fastcc i32 @external_func(i64 inreg %arg) + to label %cont unwind label %lpad +cont: + ret i32 %result +lpad: + %phi = phi i32 [ 99, %entry ] + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i32 %phi +} + +; The "invoke" should be converted to a "call". +; CHECK-LABEL: define i32 @invoke_test +; CHECK: %result = call fastcc i32 @external_func(i64 inreg %arg) +; CHECK-NEXT: br label %cont + +; Note that this pass does not remove dead landingpad blocks. +; CHECK: lpad: +; CHECK-NOT: phi +; CHECK: landingpad diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll index 7c7b4fc..d980b7f 100644 --- a/test/Transforms/MemCpyOpt/form-memset.ll +++ b/test/Transforms/MemCpyOpt/form-memset.ll @@ -272,3 +272,15 @@ define void @test9() nounwind { ; CHECK-LABEL: @test9( ; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false) } + +; PR19092 +define void @test10(i8* nocapture %P) nounwind { + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false) + ret void +; CHECK-LABEL: @test10( +; CHECK-NOT: memset +; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) +; CHECK-NOT: memset +; CHECK: ret void +} diff --git a/test/Transforms/MemCpyOpt/memcpy-undef.ll b/test/Transforms/MemCpyOpt/memcpy-undef.ll new file mode 100644 index 0000000..663b8dc --- /dev/null +++ b/test/Transforms/MemCpyOpt/memcpy-undef.ll @@ -0,0 +1,46 @@ +; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%struct.foo = type { i8, [7 x i8], i32 } + +define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable { + %bletch.sroa.1 = alloca [7 x i8], align 1 + %1 = getelementptr inbounds %struct.foo* %foobie, i64 0, i32 0 + store i8 98, i8* %1, align 4 + %2 = getelementptr inbounds %struct.foo* %foobie, i64 0, i32 1, i64 0 + %3 = getelementptr inbounds [7 x i8]* %bletch.sroa.1, i64 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i32 1, i1 false) + %4 = getelementptr inbounds %struct.foo* %foobie, i64 0, i32 2 + store i32 20, i32* %4, align 4 + ret i32 undef + +; Check that the memcpy is removed. +; CHECK-LABEL: @test1( +; CHECK-NOT: call void @llvm.memcpy +} + +define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { + call void @llvm.lifetime.start(i64 8, i8* %in) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) + ret void + +; Check that the memcpy is removed. +; CHECK-LABEL: @test2( +; CHECK-NOT: call void @llvm.memcpy +} + +define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { + call void @llvm.lifetime.start(i64 4, i8* %in) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) + ret void + +; Check that the memcpy is not removed. +; CHECK-LABEL: @test3( +; CHECK: call void @llvm.memcpy +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll index 2417cd1..492c453 100644 --- a/test/Transforms/MemCpyOpt/memcpy.ll +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -78,6 +78,7 @@ define void @test4(i8 *%P) { declare void @test4a(i8* align 1 byval) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind %struct.S = type { i128, [4 x i8]} @@ -152,6 +153,22 @@ declare noalias i8* @malloc(i32) ; rdar://11341081 %struct.big = type { [50 x i32] } +define void @test9_addrspacecast() nounwind ssp uwtable { +entry: +; CHECK-LABEL: @test9_addrspacecast( +; CHECK: f1 +; CHECK-NOT: memcpy +; CHECK: f2 + %b = alloca %struct.big, align 4 + %tmp = alloca %struct.big, align 4 + call void @f1(%struct.big* sret %tmp) + %0 = addrspacecast %struct.big* %b to i8 addrspace(1)* + %1 = addrspacecast %struct.big* %tmp to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i64 200, i32 4, i1 false) + call void @f2(%struct.big* %b) + ret void +} + define void @test9() nounwind ssp uwtable { entry: ; CHECK: test9 diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll index 4020e10..6297af6 100644 --- a/test/Transforms/MetaRenamer/metarenamer.ll +++ b/test/Transforms/MetaRenamer/metarenamer.ll @@ -14,7 +14,9 @@ target triple = "x86_64-pc-linux-gnu" @func_7_xxx = alias weak i32 (...)* @aliased_func_7_xxx -declare i32 @aliased_func_7_xxx(...) +define i32 @aliased_func_7_xxx(...) { + ret i32 0 +} define i32 @func_3_xxx() nounwind uwtable ssp { ret i32 3 diff --git a/test/Transforms/ObjCARC/allocas.ll b/test/Transforms/ObjCARC/allocas.ll index 5065673..7347a8f 100644 --- a/test/Transforms/ObjCARC/allocas.ll +++ b/test/Transforms/ObjCARC/allocas.ll @@ -28,7 +28,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) declare i8* @objc_msgSend(i8*, i8*, ...) -; In the presense of allocas, unconditionally remove retain/release pairs only +; In the presence of allocas, unconditionally remove retain/release pairs only ; if they are known safe in both directions. This prevents matching up an inner ; retain with the boundary guarding release in the following situation: ; @@ -336,7 +336,7 @@ bb3: ret void } -; Make sure in the presense of allocas, if we find a cfghazard we do not perform +; Make sure in the presence of allocas, if we find a cfghazard we do not perform ; code motion even if we are known safe. These two concepts are separate and ; should be treated as such. ; diff --git a/test/Transforms/ObjCARC/contract-end-of-use-list.ll b/test/Transforms/ObjCARC/contract-end-of-use-list.ll new file mode 100644 index 0000000..a38cd8a --- /dev/null +++ b/test/Transforms/ObjCARC/contract-end-of-use-list.ll @@ -0,0 +1,30 @@ +; RUN: opt -S < %s -objc-arc-expand -objc-arc-contract | FileCheck %s +; Don't crash. Reproducer for a use_iterator bug from r203364. +; rdar://problem/16333235 +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin13.2.0" + +%struct = type { i8*, i8* } + +; CHECK-LABEL: @foo() { +define internal i8* @foo() { +entry: + %call = call i8* @bar() +; CHECK: %retained1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) + %retained1 = call i8* @objc_retain(i8* %call) + %isnull = icmp eq i8* %retained1, null + br i1 %isnull, label %cleanup, label %if.end + +if.end: +; CHECK: %retained2 = call i8* @objc_retain(i8* %retained1) + %retained2 = call i8* @objc_retain(i8* %retained1) + br label %cleanup + +cleanup: + %retval = phi i8* [ %retained2, %if.end ], [ null, %entry ] + ret i8* %retval +} + +declare i8* @bar() + +declare extern_weak i8* @objc_retain(i8*) diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll index 0728617..79e300c 100644 --- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll +++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll @@ -24,11 +24,11 @@ target triple = "x86_64-apple-macosx10.9.0" @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"new\00", section "__TEXT,__objc_methname,cstring_literals", align 1 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" @__CFConstantStringClassReference = external global [0 x i32] -@.str = linker_private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1 +@.str = private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1 @_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i64 10 }, section "__DATA,__cfstring" @"OBJC_CLASS_$_NSException" = external global %struct._class_t @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" = internal global %struct._class_t* @"OBJC_CLASS_$_NSException", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8 -@.str2 = linker_private unnamed_addr constant [4 x i8] c"Foo\00", align 1 +@.str2 = private unnamed_addr constant [4 x i8] c"Foo\00", align 1 @_unnamed_cfstring_3 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([4 x i8]* @.str2, i32 0, i32 0), i64 3 }, section "__DATA,__cfstring" @"\01L_OBJC_METH_VAR_NAME_4" = internal global [14 x i8] c"raise:format:\00", section "__TEXT,__objc_methname,cstring_literals", align 1 @"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" diff --git a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg new file mode 100644 index 0000000..84ac981 --- /dev/null +++ b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg @@ -0,0 +1,3 @@ +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True diff --git a/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll b/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll new file mode 100644 index 0000000..3d6da12 --- /dev/null +++ b/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -slp-vectorizer %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +define i64 @mismatched_intrinsics(<4 x i32> %in1, <2 x i32> %in2) nounwind { +; CHECK-LABEL: @mismatched_intrinsics +; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v4i32 +; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v2i32 + + %vaddlvq_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1) #2 + %vaddlv_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in2) #2 + %tst = icmp sgt i64 %vaddlvq_s32.i, %vaddlv_s32.i + %equal = sext i1 %tst to i64 + ret i64 %equal +} + +declare i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1) +declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in1) diff --git a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll new file mode 100644 index 0000000..c7ec98a --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll @@ -0,0 +1,65 @@ +; RUN: opt -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + + +; This test used to crash because we were following phi chains incorrectly. +; We used indices to get the incoming value of two phi nodes rather than +; incoming block lookup. +; This can give wrong results when the ordering of incoming +; edges in the two phi nodes don't match. +;CHECK-LABEL: bar + +%0 = type { %1, %2 } +%1 = type { double, double } +%2 = type { double, double } + + +;define fastcc void @bar() { +define void @bar() { + %1 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 0 + %2 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 1 + %3 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 0 + %4 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 1 + %5 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 0 + %6 = getelementptr inbounds %0* undef, i64 0, i32 1, i32 1 + br label %7 + +;