From 5a5c58c3efb20672b24f5ce5161051bce752494c Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 9 Apr 2008 08:23:16 +0000 Subject: Factor a bunch of functionality related to memcpy and memset transforms out of GVN and into its own pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49419 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Transforms/GVN/2008-02-24-MultipleUseofSRet.ll | 34 -------- .../Transforms/GVN/2008-03-13-ReturnSlotBitcast.ll | 19 ----- test/Transforms/GVN/form-memset.ll | 55 ------------ test/Transforms/GVN/form-memset2.ll | 99 ---------------------- test/Transforms/GVN/memcpy.ll | 22 ----- test/Transforms/GVN/sret.ll | 28 ------ .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 34 ++++++++ .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 19 +++++ test/Transforms/MemCpyOpt/dg.exp | 3 + test/Transforms/MemCpyOpt/form-memset.ll | 55 ++++++++++++ test/Transforms/MemCpyOpt/form-memset2.ll | 99 ++++++++++++++++++++++ test/Transforms/MemCpyOpt/memcpy.ll | 22 +++++ test/Transforms/MemCpyOpt/sret.ll | 28 ++++++ 13 files changed, 260 insertions(+), 257 deletions(-) delete mode 100644 test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll delete mode 100644 test/Transforms/GVN/2008-03-13-ReturnSlotBitcast.ll delete mode 100644 test/Transforms/GVN/form-memset.ll delete mode 100644 test/Transforms/GVN/form-memset2.ll delete mode 100644 test/Transforms/GVN/memcpy.ll delete mode 100644 test/Transforms/GVN/sret.ll create mode 100644 test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll create mode 100644 test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll create mode 100644 test/Transforms/MemCpyOpt/dg.exp create mode 100644 test/Transforms/MemCpyOpt/form-memset.ll create mode 100644 test/Transforms/MemCpyOpt/form-memset2.ll create mode 100644 test/Transforms/MemCpyOpt/memcpy.ll create mode 100644 test/Transforms/MemCpyOpt/sret.ll (limited to 'test/Transforms') diff --git a/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll deleted file mode 100644 index 21dff98..0000000 --- a/test/Transforms/GVN/2008-02-24-MultipleUseofSRet.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*initialize} | not grep memtmp -; PR2077 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-pc-linux-gnu" - -define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { -entry: - %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; [#uses=1] - store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 - %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; [#uses=1] - store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15 - ret void -} - -declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind - -define fastcc void @badly_optimized() nounwind { -entry: - %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp ) - %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1] - %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 ) - %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; [#uses=1] - %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 ) - %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; [#uses=0] - ret void -} - -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind diff --git a/test/Transforms/GVN/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/GVN/2008-03-13-ReturnSlotBitcast.ll deleted file mode 100644 index 7757a92..0000000 --- a/test/Transforms/GVN/2008-03-13-ReturnSlotBitcast.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep {call.*memcpy.} - %a = type { i32 } - %b = type { float } - -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind -declare void @g(%a*) - -define float @f() { -entry: - %a_var = alloca %a - %b_var = alloca %b - call void @g(%a *%a_var) - %a_i8 = bitcast %a* %a_var to i8* - %b_i8 = bitcast %b* %b_var to i8* - call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4) - %tmp1 = getelementptr %b* %b_var, i32 0, i32 0 - %tmp2 = load float* %tmp1 - ret float %tmp2 -} diff --git a/test/Transforms/GVN/form-memset.ll b/test/Transforms/GVN/form-memset.ll deleted file mode 100644 index 3bc557a..0000000 --- a/test/Transforms/GVN/form-memset.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llvm-as < %s | opt -gvn -form-memset-from-stores | llvm-dis | not grep store -; RUN: llvm-as < %s | opt -gvn -form-memset-from-stores | llvm-dis | grep {call.*llvm.memset} - -; All the stores in this example should be merged into a single memset. - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" - -define void @foo(i8 signext %c) nounwind { -entry: - %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] - %tmp = getelementptr [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] - store i8 %c, i8* %tmp, align 1 - %tmp5 = getelementptr [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] - store i8 %c, i8* %tmp5, align 1 - %tmp9 = getelementptr [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] - store i8 %c, i8* %tmp9, align 1 - %tmp13 = getelementptr [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] - store i8 %c, i8* %tmp13, align 1 - %tmp17 = getelementptr [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] - store i8 %c, i8* %tmp17, align 1 - %tmp21 = getelementptr [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] - store i8 %c, i8* %tmp21, align 1 - %tmp25 = getelementptr [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] - store i8 %c, i8* %tmp25, align 1 - %tmp29 = getelementptr [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] - store i8 %c, i8* %tmp29, align 1 - %tmp33 = getelementptr [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] - store i8 %c, i8* %tmp33, align 1 - %tmp37 = getelementptr [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] - store i8 %c, i8* %tmp37, align 1 - %tmp41 = getelementptr [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] - store i8 %c, i8* %tmp41, align 1 - %tmp45 = getelementptr [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] - store i8 %c, i8* %tmp45, align 1 - %tmp49 = getelementptr [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] - store i8 %c, i8* %tmp49, align 1 - %tmp53 = getelementptr [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] - store i8 %c, i8* %tmp53, align 1 - %tmp57 = getelementptr [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] - store i8 %c, i8* %tmp57, align 1 - %tmp61 = getelementptr [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] - store i8 %c, i8* %tmp61, align 1 - %tmp65 = getelementptr [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] - store i8 %c, i8* %tmp65, align 1 - %tmp69 = getelementptr [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] - store i8 %c, i8* %tmp69, align 1 - %tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] - store i8 %c, i8* %tmp73, align 1 - %tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind ; [#uses=0] - ret void -} - -declare i32 @bar(...) - diff --git a/test/Transforms/GVN/form-memset2.ll b/test/Transforms/GVN/form-memset2.ll deleted file mode 100644 index f4e7702..0000000 --- a/test/Transforms/GVN/form-memset2.ll +++ /dev/null @@ -1,99 +0,0 @@ -; RUN: llvm-as < %s | opt -gvn -form-memset-from-stores | llvm-dis | not grep store -; RUN: llvm-as < %s | opt -gvn -form-memset-from-stores | llvm-dis | grep {call.*llvm.memset} | count 3 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" - %struct.MV = type { i16, i16 } - -define i32 @t() nounwind { -entry: - %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] - %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %tmp20 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] - store i8 -1, i8* %tmp20, align 1 - %tmp23 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] - store i8 -1, i8* %tmp23, align 1 - %tmp26 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] - store i8 -1, i8* %tmp26, align 1 - %tmp29 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] - store i8 -1, i8* %tmp29, align 1 - %tmp32 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] - store i8 -1, i8* %tmp32, align 1 - %tmp35 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] - store i8 -1, i8* %tmp35, align 1 - %tmp38 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] - store i8 -1, i8* %tmp38, align 1 - %tmp41 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] - store i8 -1, i8* %tmp41, align 1 - %tmp43 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp43, align 2 - %tmp46 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp46, align 2 - %tmp57 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp57, align 2 - %tmp60 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp60, align 2 - %tmp71 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp71, align 2 - %tmp74 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp74, align 2 - %tmp85 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp85, align 2 - %tmp88 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp88, align 2 - %tmp99 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp99, align 2 - %tmp102 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp102, align 2 - %tmp113 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp113, align 2 - %tmp116 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp116, align 2 - %tmp127 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp127, align 2 - %tmp130 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp130, align 2 - %tmp141 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp141, align 8 - %tmp144 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp144, align 2 - %tmp148 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp148, align 2 - %tmp151 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp151, align 2 - %tmp162 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp162, align 2 - %tmp165 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp165, align 2 - %tmp176 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp176, align 2 - %tmp179 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp179, align 2 - %tmp190 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp190, align 2 - %tmp193 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp193, align 2 - %tmp204 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp204, align 2 - %tmp207 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp207, align 2 - %tmp218 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp218, align 2 - %tmp221 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp221, align 2 - %tmp232 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp232, align 2 - %tmp235 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp235, align 2 - %tmp246 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp246, align 8 - %tmp249 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp249, align 2 - %up_mvd252 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - %left_mvd253 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind - ret i32 undef -} - -declare void @foo(%struct.MV*, %struct.MV*, i8*) diff --git a/test/Transforms/GVN/memcpy.ll b/test/Transforms/GVN/memcpy.ll deleted file mode 100644 index f24a81f..0000000 --- a/test/Transforms/GVN/memcpy.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep {call.*memcpy} | count 1 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i686-apple-darwin9" - -define void @ccosl({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { -entry: - %tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1] - %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1 ; [#uses=1] - call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind - %tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; [#uses=2] - %memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 ) - %agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 ) - ret void -} - -declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind - -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind diff --git a/test/Transforms/GVN/sret.ll b/test/Transforms/GVN/sret.ll deleted file mode 100644 index b907b04..0000000 --- a/test/Transforms/GVN/sret.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep {call.*memcpy} - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i686-apple-darwin9" - -define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval %z) nounwind { -entry: - %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3] - %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; [#uses=1] - %tmp2 = load x86_fp80* %tmp1, align 16 ; [#uses=1] - %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2 ; [#uses=1] - %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; [#uses=1] - %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; [#uses=1] - %tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; [#uses=1] - %tmp8 = load x86_fp80* %tmp7, align 16 ; [#uses=1] - store x86_fp80 %tmp3, x86_fp80* %real, align 16 - store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16 - call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %memtmp, { x86_fp80, x86_fp80 }* byval %iz ) nounwind - %memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] - %agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 ) - ret void -} - -declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind - -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll new file mode 100644 index 0000000..30d0a6d --- /dev/null +++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as < %s | opt -memcpyopt -dse | llvm-dis | grep {call.*initialize} | not grep memtmp +; PR2077 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + +define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { +entry: + %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; [#uses=1] + store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 + %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; [#uses=1] + store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15 + ret void +} + +declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind + +define fastcc void @badly_optimized() nounwind { +entry: + %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp ) + %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1] + %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 ) + %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; [#uses=1] + %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 ) + %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; [#uses=0] + ret void +} + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll new file mode 100644 index 0000000..12c9a9d --- /dev/null +++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep {call.*memcpy.} + %a = type { i32 } + %b = type { float } + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind +declare void @g(%a*) + +define float @f() { +entry: + %a_var = alloca %a + %b_var = alloca %b + call void @g(%a *%a_var) + %a_i8 = bitcast %a* %a_var to i8* + %b_i8 = bitcast %b* %b_var to i8* + call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4) + %tmp1 = getelementptr %b* %b_var, i32 0, i32 0 + %tmp2 = load float* %tmp1 + ret float %tmp2 +} diff --git a/test/Transforms/MemCpyOpt/dg.exp b/test/Transforms/MemCpyOpt/dg.exp new file mode 100644 index 0000000..879685c --- /dev/null +++ b/test/Transforms/MemCpyOpt/dg.exp @@ -0,0 +1,3 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,llx,c,cpp,tr}]] diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll new file mode 100644 index 0000000..cdcd006 --- /dev/null +++ b/test/Transforms/MemCpyOpt/form-memset.ll @@ -0,0 +1,55 @@ +; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | not grep store +; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | grep {call.*llvm.memset} + +; All the stores in this example should be merged into a single memset. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + +define void @foo(i8 signext %c) nounwind { +entry: + %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] + %tmp = getelementptr [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] + store i8 %c, i8* %tmp, align 1 + %tmp5 = getelementptr [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] + store i8 %c, i8* %tmp5, align 1 + %tmp9 = getelementptr [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] + store i8 %c, i8* %tmp9, align 1 + %tmp13 = getelementptr [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] + store i8 %c, i8* %tmp13, align 1 + %tmp17 = getelementptr [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] + store i8 %c, i8* %tmp17, align 1 + %tmp21 = getelementptr [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] + store i8 %c, i8* %tmp21, align 1 + %tmp25 = getelementptr [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] + store i8 %c, i8* %tmp25, align 1 + %tmp29 = getelementptr [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] + store i8 %c, i8* %tmp29, align 1 + %tmp33 = getelementptr [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] + store i8 %c, i8* %tmp33, align 1 + %tmp37 = getelementptr [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] + store i8 %c, i8* %tmp37, align 1 + %tmp41 = getelementptr [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] + store i8 %c, i8* %tmp41, align 1 + %tmp45 = getelementptr [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] + store i8 %c, i8* %tmp45, align 1 + %tmp49 = getelementptr [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] + store i8 %c, i8* %tmp49, align 1 + %tmp53 = getelementptr [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] + store i8 %c, i8* %tmp53, align 1 + %tmp57 = getelementptr [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] + store i8 %c, i8* %tmp57, align 1 + %tmp61 = getelementptr [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] + store i8 %c, i8* %tmp61, align 1 + %tmp65 = getelementptr [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] + store i8 %c, i8* %tmp65, align 1 + %tmp69 = getelementptr [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] + store i8 %c, i8* %tmp69, align 1 + %tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] + store i8 %c, i8* %tmp73, align 1 + %tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind ; [#uses=0] + ret void +} + +declare i32 @bar(...) + diff --git a/test/Transforms/MemCpyOpt/form-memset2.ll b/test/Transforms/MemCpyOpt/form-memset2.ll new file mode 100644 index 0000000..46eb6b4 --- /dev/null +++ b/test/Transforms/MemCpyOpt/form-memset2.ll @@ -0,0 +1,99 @@ +; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | not grep store +; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | grep {call.*llvm.memset} | count 3 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + %struct.MV = type { i16, i16 } + +define i32 @t() nounwind { +entry: + %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] + %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %tmp20 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] + store i8 -1, i8* %tmp20, align 1 + %tmp23 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] + store i8 -1, i8* %tmp23, align 1 + %tmp26 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] + store i8 -1, i8* %tmp26, align 1 + %tmp29 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] + store i8 -1, i8* %tmp29, align 1 + %tmp32 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] + store i8 -1, i8* %tmp32, align 1 + %tmp35 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] + store i8 -1, i8* %tmp35, align 1 + %tmp38 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] + store i8 -1, i8* %tmp38, align 1 + %tmp41 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] + store i8 -1, i8* %tmp41, align 1 + %tmp43 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp43, align 2 + %tmp46 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp46, align 2 + %tmp57 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp57, align 2 + %tmp60 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp60, align 2 + %tmp71 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp71, align 2 + %tmp74 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp74, align 2 + %tmp85 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp85, align 2 + %tmp88 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp88, align 2 + %tmp99 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp99, align 2 + %tmp102 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp102, align 2 + %tmp113 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp113, align 2 + %tmp116 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp116, align 2 + %tmp127 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp127, align 2 + %tmp130 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp130, align 2 + %tmp141 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp141, align 8 + %tmp144 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp144, align 2 + %tmp148 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp148, align 2 + %tmp151 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp151, align 2 + %tmp162 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp162, align 2 + %tmp165 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp165, align 2 + %tmp176 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp176, align 2 + %tmp179 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp179, align 2 + %tmp190 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp190, align 2 + %tmp193 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp193, align 2 + %tmp204 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp204, align 2 + %tmp207 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp207, align 2 + %tmp218 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp218, align 2 + %tmp221 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp221, align 2 + %tmp232 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp232, align 2 + %tmp235 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp235, align 2 + %tmp246 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp246, align 8 + %tmp249 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp249, align 2 + %up_mvd252 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + %left_mvd253 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind + ret i32 undef +} + +declare void @foo(%struct.MV*, %struct.MV*, i8*) diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll new file mode 100644 index 0000000..c5cdc29 --- /dev/null +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as < %s | opt -memcpyopt -dse | llvm-dis | grep {call.*memcpy} | count 1 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i686-apple-darwin9" + +define void @ccosl({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { +entry: + %tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1] + %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + %tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1 ; [#uses=1] + call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind + %tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; [#uses=2] + %memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 ) + %agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 ) + ret void +} + +declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll new file mode 100644 index 0000000..1ac11aa --- /dev/null +++ b/test/Transforms/MemCpyOpt/sret.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep {call.*memcpy} + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i686-apple-darwin9" + +define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval %z) nounwind { +entry: + %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3] + %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] + %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; [#uses=1] + %tmp2 = load x86_fp80* %tmp1, align 16 ; [#uses=1] + %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2 ; [#uses=1] + %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; [#uses=1] + %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; [#uses=1] + %tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; [#uses=1] + %tmp8 = load x86_fp80* %tmp7, align 16 ; [#uses=1] + store x86_fp80 %tmp3, x86_fp80* %real, align 16 + store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16 + call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %memtmp, { x86_fp80, x86_fp80 }* byval %iz ) nounwind + %memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; [#uses=1] + %agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; [#uses=1] + call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 ) + ret void +} + +declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind -- cgit v1.1