aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/ScalarRepl/inline-vector.ll
diff options
context:
space:
mode:
authorCameron Zwarich <zwarich@apple.com>2011-03-29 05:19:52 +0000
committerCameron Zwarich <zwarich@apple.com>2011-03-29 05:19:52 +0000
commit9827b78b51f285e90c2b1e5add9b28d10c88595c (patch)
tree8874e9292f3420d47b8f6188b42934c3be18eaf1 /test/Transforms/ScalarRepl/inline-vector.ll
parentad7d8a598b17452138c20f7a7acdcd7e1f0053d8 (diff)
downloadexternal_llvm-9827b78b51f285e90c2b1e5add9b28d10c88595c.zip
external_llvm-9827b78b51f285e90c2b1e5add9b28d10c88595c.tar.gz
external_llvm-9827b78b51f285e90c2b1e5add9b28d10c88595c.tar.bz2
Do some simple copy propagation through integer loads and stores when promoting
vector types. This helps a lot with inlined functions when using the ARM soft float ABI. Fixes <rdar://problem/9184212>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128453 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/ScalarRepl/inline-vector.ll')
-rw-r--r--test/Transforms/ScalarRepl/inline-vector.ll53
1 files changed, 53 insertions, 0 deletions
diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll
new file mode 100644
index 0000000..2f51cc7
--- /dev/null
+++ b/test/Transforms/ScalarRepl/inline-vector.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+%struct.Vector4 = type { float, float, float, float }
+@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
+
+; CHECK: define void @f
+; CHECK-NOT: alloca
+; CHECK: phi <4 x float>
+
+define void @f() nounwind ssp {
+entry:
+ %i = alloca i32, align 4
+ %vector = alloca %struct.Vector4, align 16
+ %agg.tmp = alloca %struct.Vector4, align 16
+ %tmp = bitcast %struct.Vector4* %vector to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ store i32 %storemerge, i32* %i, align 4
+ %cmp = icmp slt i32 %storemerge, 1000000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
+ %tmp3 = bitcast %struct.Vector4* %vector to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
+ %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
+ %1 = load [2 x i64]* %0, align 16
+ %tmp2.i = extractvalue [2 x i64] %1, 0
+ %tmp3.i = zext i64 %tmp2.i to i128
+ %tmp10.i = bitcast i128 %tmp3.i to <4 x float>
+ %sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
+ %2 = bitcast %struct.Vector4* %vector to <4 x float>*
+ store <4 x float> %sub.i.i, <4 x float>* %2, align 16
+ %tmp4 = load i32* %i, align 4
+ %inc = add nsw i32 %tmp4, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %x = getelementptr inbounds %struct.Vector4* %vector, i32 0, i32 0
+ %tmp5 = load float* %x, align 16
+ %conv = fpext float %tmp5 to double
+ %call = call i32 (...)* @printf(double %conv) nounwind
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare i32 @printf(...)