Fix PR2369 by making scalarrepl more careful about promoting

structures. Its default threshold is to promote things that are smaller than 128 bytes, which is sane. However, it is not sane to do this for things that turn into 128 *registers*. Add a cap on the number of registers introduced, defaulting to 128/4=32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52611 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2008-06-22 17:46:21 +0000
committer: Chris Lattner <sabre@nondot.org> 2008-06-22 17:46:21 +0000
commit: 963a97f1a365c8d09ca681e922371f9ec3473ee8 (patch)
tree: 6ef8e9881a444efb5b28f55b6df2eed615fd9a6d
parent: 69bfb15ecd71881ec8d01be94af99b6d90d2fe01 (diff)
download: external_llvm-963a97f1a365c8d09ca681e922371f9ec3473ee8.zip
external_llvm-963a97f1a365c8d09ca681e922371f9ec3473ee8.tar.gz
external_llvm-963a97f1a365c8d09ca681e922371f9ec3473ee8.tar.bz2
2 files changed, 32 insertions, 5 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 92d1e20..67ee700 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -178,6 +178,14 @@ bool SROA::performPromotion(Function &F) {
   return Changed;
 }
 
+/// getNumSAElements - Return the number of elements in the specific struct or
+/// array.
+static uint64_t getNumSAElements(const Type *T) {
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    return ST->getNumElements();
+  return cast<ArrayType>(T)->getNumElements();
+}
+
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
 // which runs on all of the malloc/alloca instructions in the function, removing
 // them if they are only used by getelementptr instructions.
@@ -224,7 +232,10 @@ bool SROA::performScalarRepl(Function &F) {
         (isa<StructType>(AI->getAllocatedType()) ||
          isa<ArrayType>(AI->getAllocatedType())) &&
         AI->getAllocatedType()->isSized() &&
-        TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold) {
+        // Do not promote any struct whose size is larger than "128" bytes.
+        TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold &&
+        // Do not promote any struct into more than "32" separate vars.
+        getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) {
       // Check that all of the users of the allocation are capable of being
       // transformed.
       switch (isSafeAllocaToScalarRepl(AI)) {
@@ -672,11 +683,9 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
       // If this is a memcpy/memmove, emit a GEP of the other element address.
       Value *OtherElt = 0;
       if (OtherPtr) {
-        Value *Idx[2];
-        Idx[0] = Zero;
-        Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+        Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) };
         OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
-                                             OtherPtr->getNameStr()+"."+utostr(i),
+                                           OtherPtr->getNameStr()+"."+utostr(i),
                                              MI);
       }
 
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
new file mode 100644
index 0000000..e90dc02
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {call.*mem} 
+; PR2369
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @memtest1(i8* %dst, i8* %src) nounwind  {
+entry:
+	%temp = alloca [100 x i8]		; <[100 x i8]*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%temp1 = bitcast [100 x i8]* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 100, i32 1 )
+	%temp3 = bitcast [100 x i8]* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 100, i32 1 )
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
author	Chris Lattner <sabre@nondot.org>	2008-06-22 17:46:21 +0000
committer	Chris Lattner <sabre@nondot.org>	2008-06-22 17:46:21 +0000
commit	963a97f1a365c8d09ca681e922371f9ec3473ee8 (patch)
tree	6ef8e9881a444efb5b28f55b6df2eed615fd9a6d
parent	69bfb15ecd71881ec8d01be94af99b6d90d2fe01 (diff)
download	external_llvm-963a97f1a365c8d09ca681e922371f9ec3473ee8.zip external_llvm-963a97f1a365c8d09ca681e922371f9ec3473ee8.tar.gz external_llvm-963a97f1a365c8d09ca681e922371f9ec3473ee8.tar.bz2