Prefer to duplicate PPC Altivec loads when expanding unaligned loads

When expanding unaligned Altivec loads, we use the decremented offset trick to prevent page faults. Unfortunately, if we have a sequence of consecutive unaligned loads, this leads to suboptimal code generation because the 'extra' load from the first unaligned load can be combined with the base load from the second (but only if the decremented offset trick is not used for the first). Search up and down the chain, through loads and token factors, looking for consecutive loads, and if one is found, don't use the offset reduction trick. These duplicate loads are later combined to yield the desired sequence (in the future, we might want a more-powerful chain search, but that will require some changes to allow the combiner routines to access the AA object). This should complete the initial implementation of the optimized unaligned Altivec load expansion. There is some refactoring that should be done, but that will happen when the unaligned store expansion is added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182719 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-05-26 18:08:30 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-05-26 18:08:30 +0000
commit: 1907cad7c822f07894a1189886fa7577f109045a (patch)
tree: b0f3d0701b5c644414398333635637b68c96a342 /test/CodeGen/PowerPC/unal-altivec.ll
parent: ea991637e491a171571c7361b07a5e55f4c08307 (diff)
download: external_llvm-1907cad7c822f07894a1189886fa7577f109045a.zip
external_llvm-1907cad7c822f07894a1189886fa7577f109045a.tar.gz
external_llvm-1907cad7c822f07894a1189886fa7577f109045a.tar.bz2
1 files changed, 13 insertions, 12 deletions
diff --git a/test/CodeGen/PowerPC/unal-altivec.ll b/test/CodeGen/PowerPC/unal-altivec.ll
index dec93eb..7f333a1 100644
--- a/test/CodeGen/PowerPC/unal-altivec.ll
+++ b/test/CodeGen/PowerPC/unal-altivec.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -mcpu=g5 | FileCheck %s
-; RUN: llc < %s -mcpu=g5 | FileCheck %s -check-prefix=CHECK-PC
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -30,20 +29,22 @@ vector.body:                                      ; preds = %vector.body, %vecto
   br i1 %10, label %for.end, label %vector.body
 
 ; CHECK: @foo
-; CHECK: lvx [[CNST:[0-9]+]],
-; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[B2:[0-9]+]]
-; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[B2]]
-; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[B2]]
-; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]],
+; CHECK-DAG: li [[C0:[0-9]+]], 0
+; CHECK-DAG: li [[C16:[0-9]+]], 16
+; CHECK-DAG: li [[C31:[0-9]+]], 31
+; CHECK-DAG: lvx [[CNST:[0-9]+]],
+; CHECK: .LBB0_1:
+; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[C0]]
+; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], [[C16]]
+; CHECK-DAG: lvx [[LD3:[0-9]+]], [[B3]], [[C31]]
 ; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[PC]]
-; CHECK: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vperm [[R2:[0-9]+]], [[LD2]], [[LD3]], [[PC]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R2]], [[CNST]]
 ; CHECK: blr
 
-; CHECK-PC: @foo
-; CHECK-PC: lvsl
-; CHECK-PC-NOT: lvsl
-; CHECK-PC: blr
-
 for.end:                                          ; preds = %vector.body
   ret void
 }
author	Hal Finkel <hfinkel@anl.gov>	2013-05-26 18:08:30 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-05-26 18:08:30 +0000
commit	1907cad7c822f07894a1189886fa7577f109045a (patch)
tree	b0f3d0701b5c644414398333635637b68c96a342 /test/CodeGen/PowerPC/unal-altivec.ll
parent	ea991637e491a171571c7361b07a5e55f4c08307 (diff)
download	external_llvm-1907cad7c822f07894a1189886fa7577f109045a.zip external_llvm-1907cad7c822f07894a1189886fa7577f109045a.tar.gz external_llvm-1907cad7c822f07894a1189886fa7577f109045a.tar.bz2