From 36b56886974eae4f9c5ebc96befd3e7bfe5de338 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Wed, 23 Apr 2014 16:57:46 -0700 Subject: Update to LLVM 3.5a. Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617 --- test/Analysis/BasicAA/2007-11-05-SizeCrash.ll | 2 +- .../BasicAA/2007-12-08-OutOfBoundsCrash.ll | 2 +- .../BasicAA/2014-03-18-Maxlookup-reached.ll | 36 +++++ test/Analysis/BasicAA/noalias-bugs.ll | 33 ++++ test/Analysis/BasicAA/phi-aa.ll | 47 ++++++ test/Analysis/BasicAA/pr18573.ll | 53 +++++++ test/Analysis/CostModel/ARM/cast.ll | 34 ++--- test/Analysis/CostModel/ARM64/lit.local.cfg | 3 + test/Analysis/CostModel/ARM64/select.ll | 38 +++++ test/Analysis/CostModel/ARM64/store.ll | 22 +++ test/Analysis/CostModel/PowerPC/ext.ll | 21 +++ test/Analysis/CostModel/PowerPC/load_store.ll | 5 + test/Analysis/CostModel/X86/cast.ll | 97 +++++++++--- test/Analysis/CostModel/X86/cmp.ll | 4 +- test/Analysis/CostModel/X86/scalarize.ll | 41 +++++ test/Analysis/CostModel/X86/vshift-cost.ll | 167 +++++++++++++++++++++ test/Analysis/DependenceAnalysis/Banerjee.ll | 107 ++++++++++++- test/Analysis/DependenceAnalysis/GCD.ll | 83 +++++++++- test/Analysis/LazyCallGraph/basic.ll | 126 ++++++++++++++++ test/Analysis/Lint/address-spaces.ll | 25 +++ .../ScalarEvolution/2009-04-22-TruncCast.ll | 2 +- .../ScalarEvolution/2012-03-26-LoadConstant.ll | 2 +- .../ScalarEvolution/2012-05-18-LoopPredRecurse.ll | 2 +- test/Analysis/ScalarEvolution/and-xor.ll | 18 ++- test/Analysis/ScalarEvolution/fold.ll | 26 ++++ test/Analysis/ScalarEvolution/nsw-offset.ll | 2 +- test/Analysis/ScalarEvolution/trip-count-pow2.ll | 53 +++++++ test/Analysis/ScalarEvolution/trip-count-switch.ll | 30 ++++ test/Analysis/ScalarEvolution/xor-and.ll | 13 -- .../Analysis/ScalarEvolution/zext-signed-addrec.ll | 81 ++++++++++ test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll | 4 +- test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll | 18 +-- 32 files changed, 1123 insertions(+), 74 deletions(-) create mode 100644 test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll create mode 100644 test/Analysis/BasicAA/noalias-bugs.ll create mode 100644 test/Analysis/BasicAA/pr18573.ll create mode 100644 test/Analysis/CostModel/ARM64/lit.local.cfg create mode 100644 test/Analysis/CostModel/ARM64/select.ll create mode 100644 test/Analysis/CostModel/ARM64/store.ll create mode 100644 test/Analysis/CostModel/PowerPC/ext.ll create mode 100644 test/Analysis/CostModel/X86/scalarize.ll create mode 100644 test/Analysis/CostModel/X86/vshift-cost.ll create mode 100644 test/Analysis/LazyCallGraph/basic.ll create mode 100644 test/Analysis/Lint/address-spaces.ll create mode 100644 test/Analysis/ScalarEvolution/trip-count-pow2.ll create mode 100644 test/Analysis/ScalarEvolution/trip-count-switch.ll delete mode 100644 test/Analysis/ScalarEvolution/xor-and.ll create mode 100644 test/Analysis/ScalarEvolution/zext-signed-addrec.ll (limited to 'test/Analysis') diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll index 563d332..32d9930 100644 --- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll +++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" %struct.pci_device_id = type { i32, i32, i32, i32, i32, i32, i64 } %struct.usb_bus = type { %struct.device* } %struct.usb_hcd = type { %struct.usb_bus, i64, [0 x i64] } -@uhci_pci_ids = external constant [1 x %struct.pci_device_id] ; <[1 x %struct.pci_device_id]*> [#uses=1] +@uhci_pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer @__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @uhci_pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0] diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll index 52d0af1..cd997ea 100644 --- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll +++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" %struct.pci_device_id = type { i32, i32, i32, i32, i32, i32, i64 } %struct.usb_bus = type { %struct.device* } %struct.usb_hcd = type { %struct.usb_bus, [0 x i64] } -@pci_ids = external constant [1 x %struct.pci_device_id] ; <[1 x %struct.pci_device_id]*> [#uses=1] +@pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer @__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0] diff --git a/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll b/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll new file mode 100644 index 0000000..bc2512e --- /dev/null +++ b/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -basicaa -gvn -S | FileCheck %s + +; PR15967 +; BasicAA claims no alias when there is (due to a problem when the MaxLookup +; limit was reached). + +target datalayout = "e" + +%struct.foo = type { i32, i32 } + +define i32 @main() { + %t = alloca %struct.foo, align 4 + %1 = getelementptr inbounds %struct.foo* %t, i32 0, i32 0 + store i32 1, i32* %1, align 4 + %2 = getelementptr inbounds %struct.foo* %t, i64 1 + %3 = bitcast %struct.foo* %2 to i8* + %4 = getelementptr inbounds i8* %3, i32 -1 + store i8 0, i8* %4 + %5 = getelementptr inbounds i8* %4, i32 -1 + store i8 0, i8* %5 + %6 = getelementptr inbounds i8* %5, i32 -1 + store i8 0, i8* %6 + %7 = getelementptr inbounds i8* %6, i32 -1 + store i8 0, i8* %7 + %8 = getelementptr inbounds i8* %7, i32 -1 + store i8 0, i8* %8 + %9 = getelementptr inbounds i8* %8, i32 -1 + store i8 0, i8* %9 + %10 = getelementptr inbounds i8* %9, i32 -1 + store i8 0, i8* %10 + %11 = getelementptr inbounds i8* %10, i32 -1 + store i8 0, i8* %11 + %12 = load i32* %1, align 4 + ret i32 %12 +; CHECK: ret i32 %12 +} diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll new file mode 100644 index 0000000..2bcc14f --- /dev/null +++ b/test/Analysis/BasicAA/noalias-bugs.ll @@ -0,0 +1,33 @@ +; RUN: opt -S -basicaa -dse < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; We incorrectly returned noalias in the example below for "ptr.64" and +; "either_ptr.64". +; PR18460 + +%nested = type { %nested.i64 } +%nested.i64 = type { i64 } + +define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2, + i32 %a, i32 %b) { + %ptr = getelementptr inbounds %nested* %p1, i64 -1, i32 0 + %ptr.64 = getelementptr inbounds %nested.i64* %ptr, i64 0, i32 0 + %ptr2= getelementptr inbounds %nested* %p2, i64 0, i32 0 + %cmp = icmp ult i32 %a, %b + %either_ptr = select i1 %cmp, %nested.i64* %ptr2, %nested.i64* %ptr + %either_ptr.64 = getelementptr inbounds %nested.i64* %either_ptr, i64 0, i32 0 + +; Because either_ptr.64 and ptr.64 can alias (we used to return noalias) +; elimination of the first store is not valid. + +; CHECK: store i64 2 +; CHECK: load +; CHECK; store i64 1 + + store i64 2, i64* %ptr.64, align 8 + %r = load i64* %either_ptr.64, align 8 + store i64 1, i64* %ptr.64, align 8 + ret i64 %r +} diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll index 6aa26c1..74279e1 100644 --- a/test/Analysis/BasicAA/phi-aa.ll +++ b/test/Analysis/BasicAA/phi-aa.ll @@ -1,10 +1,14 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + ; rdar://7282591 @X = common global i32 0 @Y = common global i32 0 @Z = common global i32 0 +; CHECK-LABEL: foo ; CHECK: NoAlias: i32* %P, i32* @Z define void @foo(i32 %cond) nounwind { @@ -29,3 +33,46 @@ bb2: return: ret void } + +; Pointers can vary in between iterations of loops. +; PR18068 + +; CHECK-LABEL: pr18068 +; CHECK: MayAlias: i32* %0, i32* %arrayidx5 + +define i32 @pr18068(i32* %jj7, i32* %j) { +entry: + %oa5 = alloca [100 x i32], align 16 + br label %codeRepl + +codeRepl: + %0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ] + %targetBlock = call i1 @cond(i32* %jj7) + br i1 %targetBlock, label %for.body, label %bye + +for.body: + %1 = load i32* %jj7, align 4 + %idxprom4 = zext i32 %1 to i64 + %arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4 + %2 = load i32* %arrayidx5, align 4 + %sub6 = sub i32 %2, 6 + store i32 %sub6, i32* %arrayidx5, align 4 + ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store. + %3 = load i32* %0, align 4 + store i32 %3, i32* %arrayidx5, align 4 + %sub11 = add i32 %1, -1 + %idxprom12 = zext i32 %sub11 to i64 + %arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12 + call void @inc(i32* %jj7) + br label %codeRepl + +bye: + %.reload = load i32* %jj7, align 4 + ret i32 %.reload +} + +declare i1 @cond(i32*) + +declare void @inc(i32*) + + diff --git a/test/Analysis/BasicAA/pr18573.ll b/test/Analysis/BasicAA/pr18573.ll new file mode 100644 index 0000000..1d2a316 --- /dev/null +++ b/test/Analysis/BasicAA/pr18573.ll @@ -0,0 +1,53 @@ +; RUN: opt %s -O2 -S | FileCheck %s + +; Check that llvm.x86.avx2.gather.d.ps.256 intrinsic is not eliminated as gather and store memory accesses are based on arr.ptr +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind readonly +declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) #0 + +; Function Attrs: nounwind +define <8 x float> @foo1(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 { +allocas: + %vix = load <8 x i32>* %vix.ptr, align 4 + %t1.ptr = getelementptr i8* %arr.ptr, i8 4 + + %v1 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> , i8 1) #2 + store i8 1, i8* %t1.ptr, align 4 + + %v2 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> , i8 1) #2 + %res = fadd <8 x float> %v1, %v2 + + ret <8 x float> %res +} +; CHECK: foo1 +; CHECK: llvm.x86.avx2.gather.d.ps.256 +; CHECK: store +; CHECK: llvm.x86.avx2.gather.d.ps.256 + +; Check that second gather is eliminated as gather and store memory accesses are based on different no-aliasing pointers + +; Function Attrs: nounwind +define <8 x float> @foo2(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 { +allocas: + %vix = load <8 x i32>* %vix.ptr, align 4 + %t1.ptr = getelementptr i8* %arr.ptr, i8 4 + + %v1 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> , i8 1) #2 + store i8 1, i8* %t2.ptr, align 4 + + %v2 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> , i8 1) #2 + %res = fadd <8 x float> %v1, %v2 + + ret <8 x float> %res +} +; CHECK: foo2 +; CHECK: llvm.x86.avx2.gather.d.ps.256 +; CHECK: store +; CHECK-NOT: llvm.x86.avx2.gather.d.ps.256 + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind "target-cpu"="corei7-avx" "target-features"="+avx2,+popcnt,+cmov,+f16c,+rdrnd,+fma" } +attributes #2 = { nounwind } + diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll index 0cdd61c..662110f 100644 --- a/test/Analysis/CostModel/ARM/cast.ll +++ b/test/Analysis/CostModel/ARM/cast.ll @@ -221,9 +221,9 @@ define i32 @casts() { %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r97 = fptosi <2 x float> undef to <2 x i32> - ; CHECK: cost of 24 {{.*}} fptoui + ; CHECK: cost of 28 {{.*}} fptoui %r98 = fptoui <2 x float> undef to <2 x i64> - ; CHECK: cost of 24 {{.*}} fptosi + ; CHECK: cost of 28 {{.*}} fptosi %r99 = fptosi <2 x float> undef to <2 x i64> ; CHECK: cost of 8 {{.*}} fptoui @@ -242,9 +242,9 @@ define i32 @casts() { %r106 = fptoui <2 x double> undef to <2 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r107 = fptosi <2 x double> undef to <2 x i32> - ; CHECK: cost of 24 {{.*}} fptoui + ; CHECK: cost of 28 {{.*}} fptoui %r108 = fptoui <2 x double> undef to <2 x i64> - ; CHECK: cost of 24 {{.*}} fptosi + ; CHECK: cost of 28 {{.*}} fptosi %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK: cost of 16 {{.*}} fptoui @@ -263,9 +263,9 @@ define i32 @casts() { %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: cost of 48 {{.*}} fptoui + ; CHECK: cost of 56 {{.*}} fptoui %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: cost of 48 {{.*}} fptosi + ; CHECK: cost of 56 {{.*}} fptosi %r119 = fptosi <4 x float> undef to <4 x i64> ; CHECK: cost of 16 {{.*}} fptoui @@ -284,9 +284,9 @@ define i32 @casts() { %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK: cost of 16 {{.*}} fptosi %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: cost of 48 {{.*}} fptoui + ; CHECK: cost of 56 {{.*}} fptoui %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: cost of 48 {{.*}} fptosi + ; CHECK: cost of 56 {{.*}} fptosi %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK: cost of 32 {{.*}} fptoui @@ -305,9 +305,9 @@ define i32 @casts() { %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: cost of 96 {{.*}} fptoui + ; CHECK: cost of 112 {{.*}} fptoui %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: cost of 96 {{.*}} fptosi + ; CHECK: cost of 112 {{.*}} fptosi %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK: cost of 32 {{.*}} fptoui @@ -326,9 +326,9 @@ define i32 @casts() { %r146 = fptoui <8 x double> undef to <8 x i32> ; CHECK: cost of 32 {{.*}} fptosi %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: cost of 96 {{.*}} fptoui + ; CHECK: cost of 112 {{.*}} fptoui %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: cost of 96 {{.*}} fptosi + ; CHECK: cost of 112 {{.*}} fptosi %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK: cost of 64 {{.*}} fptoui @@ -347,9 +347,9 @@ define i32 @casts() { %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK: cost of 4 {{.*}} fptosi %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: cost of 192 {{.*}} fptoui + ; CHECK: cost of 224 {{.*}} fptoui %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: cost of 192 {{.*}} fptosi + ; CHECK: cost of 224 {{.*}} fptosi %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK: cost of 64 {{.*}} fptoui @@ -368,9 +368,9 @@ define i32 @casts() { %r166 = fptoui <16 x double> undef to <16 x i32> ; CHECK: cost of 64 {{.*}} fptosi %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: cost of 192 {{.*}} fptoui + ; CHECK: cost of 224 {{.*}} fptoui %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: cost of 192 {{.*}} fptosi + ; CHECK: cost of 224 {{.*}} fptosi %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK: cost of 8 {{.*}} uitofp @@ -528,7 +528,7 @@ define i32 @casts() { %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK: cost of 64 {{.*}} sitofp %r243 = sitofp <16 x i8> undef to <16 x double> - ; C4ECK: cost of 64 {{.*}} uitofp + ; CHECK: cost of 64 {{.*}} uitofp %r244 = uitofp <16 x i16> undef to <16 x double> ; CHECK: cost of 64 {{.*}} sitofp %r245 = sitofp <16 x i16> undef to <16 x double> diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/ARM64/lit.local.cfg new file mode 100644 index 0000000..84ac981 --- /dev/null +++ b/test/Analysis/CostModel/ARM64/lit.local.cfg @@ -0,0 +1,3 @@ +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/ARM64/select.ll new file mode 100644 index 0000000..216dc5d --- /dev/null +++ b/test/Analysis/CostModel/ARM64/select.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" + +; CHECK-LABEL: select +define void @select() { + ; Scalar values + ; CHECK: cost of 1 {{.*}} select + %v1 = select i1 undef, i8 undef, i8 undef + ; CHECK: cost of 1 {{.*}} select + %v2 = select i1 undef, i16 undef, i16 undef + ; CHECK: cost of 1 {{.*}} select + %v3 = select i1 undef, i32 undef, i32 undef + ; CHECK: cost of 1 {{.*}} select + %v4 = select i1 undef, i64 undef, i64 undef + ; CHECK: cost of 1 {{.*}} select + %v5 = select i1 undef, float undef, float undef + ; CHECK: cost of 1 {{.*}} select + %v6 = select i1 undef, double undef, double undef + + ; Vector values - check for vectors that have a high cost because they end up + ; scalarized. + ; CHECK: cost of 320 {{.*}} select + %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef + + ; CHECK: cost of 160 {{.*}} select + %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef + ; CHECK: cost of 320 {{.*}} select + %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef + + ; CHECK: cost of 80 {{.*}} select + %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef + ; CHECK: cost of 160 {{.*}} select + %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef + ; CHECK: cost of 320 {{.*}} select + %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef + + ret void +} diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/ARM64/store.ll new file mode 100644 index 0000000..0c9883c --- /dev/null +++ b/test/Analysis/CostModel/ARM64/store.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +; CHECK-LABEL: store +define void @store() { + ; Stores of <2 x i64> should be expensive because we don't split them and + ; and unaligned 16b stores have bad performance. + ; CHECK: cost of 12 {{.*}} store + store <2 x i64> undef, <2 x i64> * undef + + ; We scalarize the loads/stores because there is no vector register name for + ; these types (they get extended to v.4h/v.2s). + ; CHECK: cost of 16 {{.*}} store + store <2 x i8> undef, <2 x i8> * undef + ; CHECK: cost of 64 {{.*}} store + store <4 x i8> undef, <4 x i8> * undef + ; CHECK: cost of 16 {{.*}} load + load <2 x i8> * undef + ; CHECK: cost of 64 {{.*}} load + load <4 x i8> * undef + + ret void +} diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll new file mode 100644 index 0000000..daaa8f5 --- /dev/null +++ b/test/Analysis/CostModel/PowerPC/ext.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @exts() { + + ; CHECK: cost of 1 {{.*}} sext + %v1 = sext i16 undef to i32 + + ; CHECK: cost of 1 {{.*}} sext + %v2 = sext <2 x i16> undef to <2 x i32> + + ; CHECK: cost of 1 {{.*}} sext + %v3 = sext <4 x i16> undef to <4 x i32> + + ; CHECK: cost of 216 {{.*}} sext + %v4 = sext <8 x i16> undef to <8 x i32> + + ret void +} + diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll index c77cce9..8145a1d 100644 --- a/test/Analysis/CostModel/PowerPC/load_store.ll +++ b/test/Analysis/CostModel/PowerPC/load_store.ll @@ -29,6 +29,11 @@ define i32 @loads(i32 %arg) { ; CHECK: cost of 4 {{.*}} load load i128* undef, align 4 + ; FIXME: There actually are sub-vector Altivec loads, and so we could handle + ; this with a small expense, but we don't currently. + ; CHECK: cost of 60 {{.*}} load + load <4 x i16>* undef, align 2 + ret i32 undef } diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index f3c1283..7f97b17 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -1,10 +1,11 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" define i32 @add(i32 %arg) { - +; CHECK-LABEL: for function 'add' ; -- Same size registeres -- ;CHECK: cost of 1 {{.*}} zext %A = zext <4 x i1> undef to <4 x i32> @@ -33,57 +34,106 @@ define i32 @add(i32 %arg) { } define i32 @zext_sext(<8 x i1> %in) { - ;CHECK: cost of 6 {{.*}} zext +; CHECK-AVX2-LABEL: for function 'zext_sext' +; CHECK-AVX-LABEL: for function 'zext_sext' + ;CHECK-AVX2: cost of 3 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext %Z = zext <8 x i1> %in to <8 x i32> - ;CHECK: cost of 9 {{.*}} sext + ;CHECK-AVX2: cost of 3 {{.*}} sext + ;CHECK-AVX: cost of 7 {{.*}} sext %S = sext <8 x i1> %in to <8 x i32> - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext %A1 = zext <16 x i8> undef to <16 x i16> - ;CHECK: cost of 1 {{.*}} sext + ;CHECK-AVX2: cost of 1 {{.*}} sext + ;CHECK-AVX: cost of 4 {{.*}} sext %A2 = sext <16 x i8> undef to <16 x i16> - ;CHECK: cost of 1 {{.*}} sext + ;CHECK-AVX2: cost of 1 {{.*}} sext + ;CHECK-AVX: cost of 4 {{.*}} sext %A = sext <8 x i16> undef to <8 x i32> - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext %B = zext <8 x i16> undef to <8 x i32> - ;CHECK: cost of 1 {{.*}} sext + ;CHECK-AVX2: cost of 1 {{.*}} sext + ;CHECK-AVX: cost of 4 {{.*}} sext %C = sext <4 x i32> undef to <4 x i64> - ;CHECK: cost of 6 {{.*}} sext - %C1 = sext <4 x i8> undef to <4 x i64> - ;CHECK: cost of 6 {{.*}} sext - %C2 = sext <4 x i16> undef to <4 x i64> - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX2: cost of 3 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext + %C.v8i8.z = zext <8 x i8> undef to <8 x i32> + ;CHECK-AVX2: cost of 3 {{.*}} sext + ;CHECK-AVX: cost of 7 {{.*}} sext + %C.v8i8.s = sext <8 x i8> undef to <8 x i32> + ;CHECK-AVX2: cost of 3 {{.*}} zext + ;CHECK-AVX: cost of 3 {{.*}} zext + %C.v4i16.z = zext <4 x i16> undef to <4 x i64> + ;CHECK-AVX2: cost of 3 {{.*}} sext + ;CHECK-AVX: cost of 6 {{.*}} sext + %C.v4i16.s = sext <4 x i16> undef to <4 x i64> + + ;CHECK-AVX2: cost of 3 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext + %C.v4i8.z = zext <4 x i8> undef to <4 x i64> + ;CHECK-AVX2: cost of 3 {{.*}} sext + ;CHECK-AVX: cost of 6 {{.*}} sext + %C.v4i8.s = sext <4 x i8> undef to <4 x i64> + + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext %D = zext <4 x i32> undef to <4 x i64> - ;CHECK: cost of 1 {{.*}} trunc + ;CHECK-AVX2: cost of 2 {{.*}} trunc + ;CHECK-AVX: cost of 4 {{.*}} trunc %E = trunc <4 x i64> undef to <4 x i32> - ;CHECK: cost of 1 {{.*}} trunc + ;CHECK-AVX2: cost of 2 {{.*}} trunc + ;CHECK-AVX: cost of 5 {{.*}} trunc %F = trunc <8 x i32> undef to <8 x i16> - ;CHECK: cost of 2 {{.*}} trunc + ;CHECK-AVX2: cost of 4 {{.*}} trunc + ;CHECK-AVX: cost of 4 {{.*}} trunc %F1 = trunc <16 x i16> undef to <16 x i8> - - ;CHECK: cost of 3 {{.*}} trunc + ;CHECK-AVX2: cost of 2 {{.*}} trunc + ;CHECK-AVX: cost of 4 {{.*}} trunc + %F2 = trunc <8 x i32> undef to <8 x i8> + ;CHECK-AVX2: cost of 2 {{.*}} trunc + ;CHECK-AVX: cost of 4 {{.*}} trunc + %F3 = trunc <4 x i64> undef to <4 x i8> + + ;CHECK-AVX2: cost of 4 {{.*}} trunc + ;CHECK-AVX: cost of 9 {{.*}} trunc %G = trunc <8 x i64> undef to <8 x i32> ret i32 undef } define i32 @masks8(<8 x i1> %in) { - ;CHECK: cost of 6 {{.*}} zext +; CHECK-AVX2-LABEL: for function 'masks8' +; CHECK-AVX-LABEL: for function 'masks8' + + ;CHECK-AVX2: cost of 3 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext %Z = zext <8 x i1> %in to <8 x i32> - ;CHECK: cost of 9 {{.*}} sext + ;CHECK-AVX2: cost of 3 {{.*}} sext + ;CHECK-AVX: cost of 7 {{.*}} sext %S = sext <8 x i1> %in to <8 x i32> ret i32 undef } define i32 @masks4(<4 x i1> %in) { - ;CHECK: cost of 8 {{.*}} sext +; CHECK-AVX2-LABEL: for function 'masks4' +; CHECK-AVX-LABEL: for function 'masks4' + + ;CHECK-AVX2: cost of 3 {{.*}} zext + ;CHECK-AVX: cost of 4 {{.*}} zext + %Z = zext <4 x i1> %in to <4 x i64> + ;CHECK-AVX2: cost of 3 {{.*}} sext + ;CHECK-AVX: cost of 6 {{.*}} sext %S = sext <4 x i1> %in to <4 x i64> ret i32 undef } define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { +; CHECK-LABEL: for function 'sitofp4' ; CHECK: cost of 3 {{.*}} sitofp %A1 = sitofp <4 x i1> %a to <4 x float> ; CHECK: cost of 3 {{.*}} sitofp @@ -107,6 +157,7 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { } define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { +; CHECK-LABEL: for function 'sitofp8' ; CHECK: cost of 8 {{.*}} sitofp %A1 = sitofp <8 x i1> %a to <8 x float> @@ -122,6 +173,7 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { } define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { +; CHECK-LABEL: for function 'uitofp4' ; CHECK: cost of 7 {{.*}} uitofp %A1 = uitofp <4 x i1> %a to <4 x float> ; CHECK: cost of 7 {{.*}} uitofp @@ -145,6 +197,7 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { } define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { +; CHECK-LABEL: for function 'uitofp8' ; CHECK: cost of 6 {{.*}} uitofp %A1 = uitofp <8 x i1> %a to <8 x float> diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll index 713b374..9f2bdb3 100644 --- a/test/Analysis/CostModel/X86/cmp.ll +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=AVX1 %s -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=AVX2 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Analysis/CostModel/X86/scalarize.ll b/test/Analysis/CostModel/X86/scalarize.ll new file mode 100644 index 0000000..fc25fcb --- /dev/null +++ b/test/Analysis/CostModel/X86/scalarize.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64 + +; Test vector scalarization costs. +; RUN: llc < %s -march=x86 -mcpu=i386 +; RUN: llc < %s -march=x86 -mcpu=yonah + +%i4 = type <4 x i32> +%i8 = type <2 x i64> + +;;; TEST HANDLING OF VARIOUS VECTOR SIZES + +declare %i4 @llvm.bswap.v4i32(%i4) +declare %i8 @llvm.bswap.v2i64(%i8) + +declare %i4 @llvm.ctpop.v4i32(%i4) +declare %i8 @llvm.ctpop.v2i64(%i8) + +; CHECK32-LABEL: test_scalarized_intrinsics +; CHECK64-LABEL: test_scalarized_intrinsics +define void @test_scalarized_intrinsics() { + %r1 = add %i8 undef, undef + +; CHECK32: cost of 12 {{.*}}bswap.v4i32 +; CHECK64: cost of 12 {{.*}}bswap.v4i32 + %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef) +; CHECK32: cost of 10 {{.*}}bswap.v2i64 +; CHECK64: cost of 6 {{.*}}bswap.v2i64 + %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef) + +; CHECK32: cost of 12 {{.*}}ctpop.v4i32 +; CHECK64: cost of 12 {{.*}}ctpop.v4i32 + %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef) +; CHECK32: cost of 10 {{.*}}ctpop.v2i64 +; CHECK64: cost of 6 {{.*}}ctpop.v2i64 + %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef) + +; CHECK32: ret +; CHECK64: ret + ret void +} diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll new file mode 100644 index 0000000..84d7246 --- /dev/null +++ b/test/Analysis/CostModel/X86/vshift-cost.ll @@ -0,0 +1,167 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 + + +; Verify the cost of vector shift left instructions. + +; We always emit a single pmullw in the case of v8i16 vector shifts by +; non-uniform constant. + +define <8 x i16> @test1(<8 x i16> %a) { + %shl = shl <8 x i16> %a, + ret <8 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test1': +; CHECK: Found an estimated cost of 1 for instruction: %shl + + +define <8 x i16> @test2(<8 x i16> %a) { + %shl = shl <8 x i16> %a, + ret <8 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test2': +; CHECK: Found an estimated cost of 1 for instruction: %shl + + +; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction. +; Make sure that the estimated cost is always 1 except for the case where +; we only have SSE2 support. With SSE2, we are forced to special lower the +; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle. + +define <4 x i32> @test3(<4 x i32> %a) { + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test3': +; SSE2: Found an estimated cost of 6 for instruction: %shl +; SSE41: Found an estimated cost of 1 for instruction: %shl +; AVX: Found an estimated cost of 1 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl + + +define <4 x i32> @test4(<4 x i32> %a) { + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test4': +; SSE2: Found an estimated cost of 6 for instruction: %shl +; SSE41: Found an estimated cost of 1 for instruction: %shl +; AVX: Found an estimated cost of 1 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl + + +; On AVX2 we are able to lower the following shift into a single +; vpsllvq. Therefore, the expected cost is only 1. +; In all other cases, this shift is scalarized as the target does not support +; vpsllv instructions. + +define <2 x i64> @test5(<2 x i64> %a) { + %shl = shl <2 x i64> %a, + ret <2 x i64> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test5': +; SSE2: Found an estimated cost of 20 for instruction: %shl +; SSE41: Found an estimated cost of 20 for instruction: %shl +; AVX: Found an estimated cost of 20 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl + + +; v16i16 and v8i32 shift left by non-uniform constant are lowered into +; vector multiply instructions. With AVX (but not AVX2), the vector multiply +; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert. +; +; With AVX2, instruction vpmullw works with 256bit quantities and +; therefore there is no need to split the resulting vector multiply into +; a sequence of two multiply. +; +; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice +; the cost computed in the case of 'test1'. That is because the backend +; simply emits 2 pmullw with no extract/insert. + + +define <16 x i16> @test6(<16 x i16> %a) { + %shl = shl <16 x i16> %a, + ret <16 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test6': +; SSE2: Found an estimated cost of 2 for instruction: %shl +; SSE41: Found an estimated cost of 2 for instruction: %shl +; AVX: Found an estimated cost of 4 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl + + +; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice +; the cost computed in the case of 'test3'. That is because the multiply +; is type-legalized into two 4i32 vector multiply. + +define <8 x i32> @test7(<8 x i32> %a) { + %shl = shl <8 x i32> %a, + ret <8 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test7': +; SSE2: Found an estimated cost of 12 for instruction: %shl +; SSE41: Found an estimated cost of 2 for instruction: %shl +; AVX: Found an estimated cost of 4 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl + + +; On AVX2 we are able to lower the following shift into a single +; vpsllvq. Therefore, the expected cost is only 1. +; In all other cases, this shift is scalarized as the target does not support +; vpsllv instructions. + +define <4 x i64> @test8(<4 x i64> %a) { + %shl = shl <4 x i64> %a, + ret <4 x i64> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test8': +; SSE2: Found an estimated cost of 40 for instruction: %shl +; SSE41: Found an estimated cost of 40 for instruction: %shl +; AVX: Found an estimated cost of 40 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl + + +; Same as 'test6', with the difference that the cost is double. + +define <32 x i16> @test9(<32 x i16> %a) { + %shl = shl <32 x i16> %a, + ret <32 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test9': +; SSE2: Found an estimated cost of 4 for instruction: %shl +; SSE41: Found an estimated cost of 4 for instruction: %shl +; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX2: Found an estimated cost of 2 for instruction: %shl + + +; Same as 'test7', except that now the cost is double. + +define <16 x i32> @test10(<16 x i32> %a) { + %shl = shl <16 x i32> %a, + ret <16 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test10': +; SSE2: Found an estimated cost of 24 for instruction: %shl +; SSE41: Found an estimated cost of 4 for instruction: %shl +; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX2: Found an estimated cost of 2 for instruction: %shl + + +; On AVX2 we are able to lower the following shift into a sequence of +; two vpsllvq instructions. Therefore, the expected cost is only 2. +; In all other cases, this shift is scalarized as we don't have vpsllv +; instructions. + +define <8 x i64> @test11(<8 x i64> %a) { + %shl = shl <8 x i64> %a, + ret <8 x i64> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test11': +; SSE2: Found an estimated cost of 80 for instruction: %shl +; SSE41: Found an estimated cost of 80 for instruction: %shl +; AVX: Found an estimated cost of 80 for instruction: %shl +; AVX2: Found an estimated cost of 2 for instruction: %shl + + diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll index 09e8fd2..5c17064 100644 --- a/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN ; ModuleID = 'Banerjee.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -21,6 +22,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee0': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [0 1]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 1, %entry ], [ %inc8, %for.inc7 ] @@ -73,6 +82,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - output [* *]! +; DELIN: 'Dependence Analysis' for function 'banerjee1': +; DELIN: da analyze - none +; DELIN: da analyze - consistent flow [0 1]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none +; DELIN: da analyze - confused! +; DELIN: da analyze - output [* *]! + for.cond1.preheader.preheader: ; preds = %entry %0 = add i64 %n, 1 br label %for.cond1.preheader @@ -140,6 +157,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee2': +; DELIN: da analyze - none! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -191,6 +216,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee3': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [-9 -9]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -242,6 +275,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee4': +; DELIN: da analyze - none! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] @@ -293,6 +334,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee5': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [9 9]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] @@ -344,6 +393,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee6': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [0 -9]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -395,6 +452,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee7': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [-1 0]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -446,6 +511,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee8': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [-1 -1]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -497,6 +570,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee9': +; DELIN: da analyze - none! +; DELIN: da analyze - flow [<= =|<]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -549,6 +630,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee10': +; DELIN: da analyze - none! +; DELIN: da analyze - flow [<> =]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] @@ -600,6 +689,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee11': +; DELIN: da analyze - none! +; DELIN: da analyze - flow [<= <>]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] @@ -651,6 +748,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'banerjee12': +; DELIN: da analyze - none! +; DELIN: da analyze - consistent flow [0 -11]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll index bb31d11..7efa8b5 100644 --- a/test/Analysis/DependenceAnalysis/GCD.ll +++ b/test/Analysis/DependenceAnalysis/GCD.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN ; ModuleID = 'GCD.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -22,6 +23,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'gcd0' +; DELIN: da analyze - none! +; DELIN: da analyze - flow [=> *|<]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc8 %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ] %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ] @@ -75,6 +84,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'gcd1' +; DELIN: da analyze - none! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc9 %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ] %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ] @@ -129,6 +146,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'gcd2' +; DELIN: da analyze - none! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc9 %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ] %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ] @@ -183,6 +208,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'gcd3' +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - flow [<> *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - input [* *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc7 %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ] %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ] @@ -235,6 +268,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'gcd4' +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - input [* *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc17 %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ] %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ] @@ -297,6 +338,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - none! +; DELIN: 'Dependence Analysis' for function 'gcd5' +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - flow [<> *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - input [* *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! + for.cond1.preheader: ; preds = %entry, %for.inc17 %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ] %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ] @@ -360,6 +409,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - output [* *]! +; DELIN: 'Dependence Analysis' for function 'gcd6' +; DELIN: da analyze - none! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - output [* *]! + for.cond1.preheader.preheader: ; preds = %entry br label %for.cond1.preheader @@ -432,6 +489,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - output [* *]! +; DELIN: 'Dependence Analysis' for function 'gcd7' +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - flow [* *|<]! +; DELIN: da analyze - confused! +; DELIN: da analyze - input [* *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - output [* *]! + for.cond1.preheader.preheader: ; preds = %entry br label %for.cond1.preheader @@ -516,6 +581,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - output [* *]! +; DELIN: 'Dependence Analysis' for function 'gcd8' +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - none! +; DELIN: da analyze - confused! +; DELIN: da analyze - input [* *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - output [* *]! + for.cond1.preheader.preheader: ; preds = %entry br label %for.cond1.preheader @@ -595,6 +668,14 @@ entry: ; CHECK: da analyze - confused! ; CHECK: da analyze - output [* *]! +; DELIN: 'Dependence Analysis' for function 'gcd9' +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - flow [* *|<]! +; DELIN: da analyze - confused! +; DELIN: da analyze - input [* *]! +; DELIN: da analyze - confused! +; DELIN: da analyze - output [* *]! + for.cond1.preheader.preheader: ; preds = %entry br label %for.cond1.preheader diff --git a/test/Analysis/LazyCallGraph/basic.ll b/test/Analysis/LazyCallGraph/basic.ll new file mode 100644 index 0000000..ebadb75 --- /dev/null +++ b/test/Analysis/LazyCallGraph/basic.ll @@ -0,0 +1,126 @@ +; RUN: opt -disable-output -passes=print-cg %s 2>&1 | FileCheck %s +; +; Basic validation of the call graph analysis used in the new pass manager. + +define void @f() { +; CHECK-LABEL: Call edges in function: f +; CHECK-NOT: -> + +entry: + ret void +} + +; A bunch more functions just to make it easier to test several call edges at once. +define void @f1() { + ret void +} +define void @f2() { + ret void +} +define void @f3() { + ret void +} +define void @f4() { + ret void +} +define void @f5() { + ret void +} +define void @f6() { + ret void +} +define void @f7() { + ret void +} +define void @f8() { + ret void +} +define void @f9() { + ret void +} +define void @f10() { + ret void +} +define void @f11() { + ret void +} +define void @f12() { + ret void +} + +declare i32 @__gxx_personality_v0(...) + +define void @test0() { +; CHECK-LABEL: Call edges in function: test0 +; CHECK-NEXT: -> f +; CHECK-NOT: -> + +entry: + call void @f() + call void @f() + call void @f() + call void @f() + ret void +} + +define void ()* @test1(void ()** %x) { +; CHECK-LABEL: Call edges in function: test1 +; CHECK-NEXT: -> f12 +; CHECK-NEXT: -> f11 +; CHECK-NEXT: -> f10 +; CHECK-NEXT: -> f7 +; CHECK-NEXT: -> f9 +; CHECK-NEXT: -> f8 +; CHECK-NEXT: -> f6 +; CHECK-NEXT: -> f5 +; CHECK-NEXT: -> f4 +; CHECK-NEXT: -> f3 +; CHECK-NEXT: -> f2 +; CHECK-NEXT: -> f1 +; CHECK-NOT: -> + +entry: + br label %next + +dead: + br label %next + +next: + phi void ()* [ @f1, %entry ], [ @f2, %dead ] + select i1 true, void ()* @f3, void ()* @f4 + store void ()* @f5, void ()** %x + call void @f6() + call void (void ()*, void ()*)* bitcast (void ()* @f7 to void (void ()*, void ()*)*)(void ()* @f8, void ()* @f9) + invoke void @f10() to label %exit unwind label %unwind + +exit: + ret void ()* @f11 + +unwind: + %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 + cleanup + resume { i8*, i32 } { i8* bitcast (void ()* @f12 to i8*), i32 42 } +} + +@g = global void ()* @f1 +@g1 = global [4 x void ()*] [void ()* @f2, void ()* @f3, void ()* @f4, void ()* @f5] +@g2 = global {i8, void ()*, i8} {i8 1, void ()* @f6, i8 2} +@h = constant void ()* @f7 + +define void @test2() { +; CHECK-LABEL: Call edges in function: test2 +; CHECK-NEXT: -> f7 +; CHECK-NEXT: -> f6 +; CHECK-NEXT: -> f5 +; CHECK-NEXT: -> f4 +; CHECK-NEXT: -> f3 +; CHECK-NEXT: -> f2 +; CHECK-NEXT: -> f1 +; CHECK-NOT: -> + + load i8** bitcast (void ()** @g to i8**) + load i8** bitcast (void ()** getelementptr ([4 x void ()*]* @g1, i32 0, i32 2) to i8**) + load i8** bitcast (void ()** getelementptr ({i8, void ()*, i8}* @g2, i32 0, i32 1) to i8**) + load i8** bitcast (void ()** @h to i8**) + ret void +} diff --git a/test/Analysis/Lint/address-spaces.ll b/test/Analysis/Lint/address-spaces.ll new file mode 100644 index 0000000..46ee1d7 --- /dev/null +++ b/test/Analysis/Lint/address-spaces.ll @@ -0,0 +1,25 @@ +; RUN: opt -lint < %s + +target datalayout = "p32:32:32-p1:16:16:16-n16:32" + +declare void @foo(i64) nounwind + +define i64 @test1(i32 addrspace(1)* %x) nounwind { + %y = ptrtoint i32 addrspace(1)* %x to i64 + ret i64 %y +} + +define <4 x i64> @test1_vector(<4 x i32 addrspace(1)*> %x) nounwind { + %y = ptrtoint <4 x i32 addrspace(1)*> %x to <4 x i64> + ret <4 x i64> %y +} + +define i32 addrspace(1)* @test2(i64 %x) nounwind { + %y = inttoptr i64 %x to i32 addrspace(1)* + ret i32 addrspace(1)* %y +} + +define <4 x i32 addrspace(1)*> @test2_vector(<4 x i64> %x) nounwind { + %y = inttoptr <4 x i64> %x to <4 x i32 addrspace(1)*> + ret <4 x i32 addrspace(1)*> %y +} \ No newline at end of file diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll index 3dacfbb..a845465 100644 --- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll +++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution | grep "(trunc i" | not grep ext +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1' ; CHECK-NOT: (trunc i{{.*}}ext diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll index b88e33f..5746d1c 100644 --- a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll +++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S +; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S | FileCheck %s ; PR11882: ComputeLoadConstantCompareExitLimit crash. ; ; for.body is deleted leaving a loop-invariant load. diff --git a/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll b/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll index 52e6683..66df9d1 100644 --- a/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll +++ b/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll @@ -4,7 +4,7 @@ ; getUDivExpr()->getZeroExtendExpr()->isLoopBackedgeGuardedBy() ; ; We actually want SCEV simplification to fail gracefully in this -; case, so there's no output to check, just the absense of stack overflow. +; case, so there's no output to check, just the absence of stack overflow. @c = common global i8 0, align 1 diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll index 404ab91..ad636da 100644 --- a/test/Analysis/ScalarEvolution/and-xor.ll +++ b/test/Analysis/ScalarEvolution/and-xor.ll @@ -1,11 +1,27 @@ ; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; CHECK-LABEL: @test1 ; CHECK: --> (zext ; CHECK: --> (zext ; CHECK-NOT: --> (zext -define i32 @foo(i32 %x) { +define i32 @test1(i32 %x) { %n = and i32 %x, 255 %y = xor i32 %n, 255 ret i32 %y } + +; ScalarEvolution shouldn't try to analyze %z into something like +; --> (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64) +; or +; --> (8 * (zext i1 (trunc i64 ((8 * %x) /u 8) to i1) to i64)) + +; CHECK-LABEL: @test2 +; CHECK: --> (8 * (zext i1 (trunc i64 %x to i1) to i64)) + +define i64 @test2(i64 %x) { + %a = shl i64 %x, 3 + %t = and i64 %a, 8 + %z = xor i64 %t, 8 + ret i64 %z +} diff --git a/test/Analysis/ScalarEvolution/fold.ll b/test/Analysis/ScalarEvolution/fold.ll index 57006dd..ab57425 100644 --- a/test/Analysis/ScalarEvolution/fold.ll +++ b/test/Analysis/ScalarEvolution/fold.ll @@ -60,3 +60,29 @@ loop: exit: ret void } + +define void @test5(i32 %i) { +; CHECK-LABEL: @test5 + %A = and i32 %i, 1 +; CHECK: --> (zext i1 (trunc i32 %i to i1) to i32) + %B = and i32 %i, 2 +; CHECK: --> (2 * (zext i1 (trunc i32 (%i /u 2) to i1) to i32)) + %C = and i32 %i, 63 +; CHECK: --> (zext i6 (trunc i32 %i to i6) to i32) + %D = and i32 %i, 126 +; CHECK: --> (2 * (zext i6 (trunc i32 (%i /u 2) to i6) to i32)) + %E = and i32 %i, 64 +; CHECK: --> (64 * (zext i1 (trunc i32 (%i /u 64) to i1) to i32)) + %F = and i32 %i, -2147483648 +; CHECK: --> (-2147483648 * (%i /u -2147483648)) + ret void +} + +define void @test6(i8 %x) { +; CHECK-LABEL: @test6 + %A = zext i8 %x to i16 + %B = shl nuw i16 %A, 8 + %C = and i16 %B, -2048 +; CHECK: --> (2048 * ((zext i8 %x to i16) /u 8)) + ret void +} diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll index 8969a5a..88cdcf2 100644 --- a/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -73,5 +73,5 @@ return: ; preds = %bb1.return_crit_edg ret void } -; CHECK: Loop %bb: backedge-taken count is ((-1 + %n) /u 2) +; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2) ; CHECK: Loop %bb: max backedge-taken count is 1073741822 diff --git a/test/Analysis/ScalarEvolution/trip-count-pow2.ll b/test/Analysis/ScalarEvolution/trip-count-pow2.ll new file mode 100644 index 0000000..2c5b72e --- /dev/null +++ b/test/Analysis/ScalarEvolution/trip-count-pow2.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s + +define void @test1(i32 %n) { +entry: + %s = mul i32 %n, 96 + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %i.next = add i32 %i, 32 + %t = icmp ne i32 %i.next, %s + br i1 %t, label %loop, label %exit +exit: + ret void + +; CHECK-LABEL: @test1 +; CHECK: Loop %loop: backedge-taken count is ((-32 + (96 * %n)) /u 32) +; CHECK: Loop %loop: max backedge-taken count is ((-32 + (96 * %n)) /u 32) +} + +; PR19183 +define i32 @test2(i32 %n) { +entry: + %s = and i32 %n, -32 + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %i.next = add i32 %i, 32 + %t = icmp ne i32 %i.next, %s + br i1 %t, label %loop, label %exit +exit: + ret i32 %i + +; CHECK-LABEL: @test2 +; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32) +; CHECK: Loop %loop: max backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32) +} + +define void @test3(i32 %n) { +entry: + %s = mul i32 %n, 96 + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %i.next = add i32 %i, 96 + %t = icmp ne i32 %i.next, %s + br i1 %t, label %loop, label %exit +exit: + ret void + +; CHECK-LABEL: @test3 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +} diff --git a/test/Analysis/ScalarEvolution/trip-count-switch.ll b/test/Analysis/ScalarEvolution/trip-count-switch.ll new file mode 100644 index 0000000..2d2b6b4 --- /dev/null +++ b/test/Analysis/ScalarEvolution/trip-count-switch.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +declare void @foo() + +define void @test1() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + switch i32 %i.0, label %if.end [ + i32 0, label %for.end + i32 1, label %if.then + ] + +if.then: ; preds = %for.cond + tail call void @foo() + br label %if.end + +if.end: ; preds = %for.cond, %if.then + %dec = add nsw i32 %i.0, -1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void + +; CHECK-LABEL: @test1 +; CHECK: Loop %for.cond: backedge-taken count is 2 +; CHECK: Loop %for.cond: max backedge-taken count is 2 +} diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll deleted file mode 100644 index 2616ea9..0000000 --- a/test/Analysis/ScalarEvolution/xor-and.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s - -; ScalarEvolution shouldn't try to analyze %z into something like -; --> (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64) - -; CHECK: --> (zext i4 (-8 + (trunc i64 (8 * %x) to i4)) to i64) - -define i64 @foo(i64 %x) { - %a = shl i64 %x, 3 - %t = and i64 %a, 8 - %z = xor i64 %t, 8 - ret i64 %z -} diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll new file mode 100644 index 0000000..27aed3b --- /dev/null +++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll @@ -0,0 +1,81 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s +; PR18000 + +target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i8 0, align 1 +@d = common global i32 0, align 4 +@c = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: nounwind optsize uwtable +; CHECK-LABEL: foo +define i32 @foo() { +entry: + %.pr = load i32* @b, align 4 + %cmp10 = icmp slt i32 %.pr, 1 + br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge + +entry.for.end9_crit_edge: ; preds = %entry + %.pre = load i32* @c, align 4 + br label %for.end9 + +for.cond1.preheader.lr.ph: ; preds = %entry + %0 = load i32* @a, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split + +for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8 + %1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ] + br label %if.end + +; CHECK-LABEL: if.end +if.end: ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge + +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ] + %indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ] + + %2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ] + %conv7 = mul i32 %indvars.iv, 258 + %shl = and i32 %conv7, 510 + store i32 %shl, i32* @c, align 4 + +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -258 + %dec = add i8 %2, -1 + + %cmp2 = icmp sgt i8 %dec, -1 + %indvars.iv.next = add i32 %indvars.iv, -1 + br i1 %cmp2, label %if.end, label %for.inc8 + +for.inc8: ; preds = %if.end + store i32 0, i32* @d, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @b, align 4 + %cmp = icmp slt i32 %1, 0 + br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge + +for.cond.for.end9_crit_edge: ; preds = %for.inc8 + store i8 %dec, i8* @e, align 1 + br label %for.end9 + +for.end9: ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge + %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ] + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2 + br label %return + +return.loopexit.split: ; preds = %for.cond1.preheader.lr.ph + store i8 1, i8* @e, align 1 + store i32 0, i32* @d, align 4 + br label %return + +return: ; preds = %return.loopexit.split, %for.end9 + %retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ] + ret i32 %retval.0 +} + +; Function Attrs: nounwind optsize +declare i32 @printf(i8* nocapture readonly, ...) + diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll index 6fd6eac..cdf7281 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -18,8 +18,8 @@ define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -; CHECK [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0} -; CHECK [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}} +; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0} +; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}} !0 = metadata !{metadata !"tbaa root", null} !1 = metadata !{metadata !3, metadata !3, i64 0} !2 = metadata !{metadata !4, metadata !4, i64 0} diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll index 0cd5c30..e1c5d45 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll @@ -43,7 +43,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i16 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %s.addr = alloca i32*, align 8 %A.addr = alloca %struct.StructA*, align 8 @@ -98,7 +98,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i16 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %A.addr = alloca %struct.StructA*, align 8 %B.addr = alloca %struct.StructB*, align 8 @@ -127,7 +127,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i32 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %A.addr = alloca %struct.StructA*, align 8 %B.addr = alloca %struct.StructB*, align 8 @@ -155,7 +155,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i32 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %A.addr = alloca %struct.StructA*, align 8 %B.addr = alloca %struct.StructB*, align 8 @@ -184,7 +184,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i32 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %A.addr = alloca %struct.StructA*, align 8 %S.addr = alloca %struct.StructS*, align 8 @@ -212,7 +212,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i16 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %A.addr = alloca %struct.StructA*, align 8 %S.addr = alloca %struct.StructS*, align 8 @@ -240,7 +240,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i32 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %S.addr = alloca %struct.StructS*, align 8 %S2.addr = alloca %struct.StructS2*, align 8 @@ -268,7 +268,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i16 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %S.addr = alloca %struct.StructS*, align 8 %S2.addr = alloca %struct.StructS2*, align 8 @@ -296,7 +296,7 @@ entry: ; OPT: define ; OPT: store i32 1 ; OPT: store i32 4 -; Remove a load and propogate the value from store. +; Remove a load and propagate the value from store. ; OPT: ret i32 1 %C.addr = alloca %struct.StructC*, align 8 %D.addr = alloca %struct.StructD*, align 8 -- cgit v1.1