aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/LoopVectorize/X86
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-04-24 16:16:01 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-04-24 16:16:01 +0000
commita4b8b4ccc9ecd6863606c7cdde6ec1b38734708a (patch)
treeb0acc2542bf99031437884cde0e81dd4047ae07d /test/Transforms/LoopVectorize/X86
parentb03ad17536097a0f172428c939e80ce7657e201b (diff)
downloadexternal_llvm-a4b8b4ccc9ecd6863606c7cdde6ec1b38734708a.zip
external_llvm-a4b8b4ccc9ecd6863606c7cdde6ec1b38734708a.tar.gz
external_llvm-a4b8b4ccc9ecd6863606c7cdde6ec1b38734708a.tar.bz2
LoopVectorize: Scalarize padded types
This patch disables memory-instruction vectorization for types that need padding bytes, e.g., x86_fp80 has 10 bytes store size with 6 bytes padding in darwin on x86_64. Because the load/store vectorization is performed by the bit casting to a packed vector, which has incompatible memory layout due to the lack of padding bytes, the present vectorizer produces inconsistent result for memory instructions of those types. This patch checks an equality of the AllocSize of a scalar type and allocated size for each vector element, to ensure that there is no padding bytes and the array can be read/written using vector operations. Patch by Daisuke Takahashi! Fixes PR15758. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180196 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
-rw-r--r--test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll29
1 files changed, 29 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
new file mode 100644
index 0000000..b66119f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -0,0 +1,29 @@
+; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@x = common global [1024 x x86_fp80] zeroinitializer, align 16
+
+;CHECK: @example
+;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>*
+;CHECK: store
+;CHECK: ret void
+
+define void @example() nounwind ssp uwtable {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %conv = sitofp i32 1 to x86_fp80
+ %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
+ store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}