diff options
-rw-r--r-- | examples/BrainF/BrainF.cpp | 7 | ||||
-rw-r--r-- | include/llvm/Analysis/MemoryBuiltins.h | 34 | ||||
-rw-r--r-- | include/llvm/Instructions.h | 5 | ||||
-rw-r--r-- | lib/Analysis/MemoryBuiltins.cpp | 76 | ||||
-rw-r--r-- | lib/AsmParser/LLParser.cpp | 4 | ||||
-rw-r--r-- | lib/Bitcode/Reader/BitcodeReader.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 141 | ||||
-rw-r--r-- | lib/VMCore/Core.cpp | 22 | ||||
-rw-r--r-- | lib/VMCore/Instructions.cpp | 45 | ||||
-rw-r--r-- | test/Analysis/PointerTracking/sizes.ll | 18 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll | 7 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/heap-sra-1.ll | 16 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/heap-sra-2.ll | 14 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/heap-sra-3.ll | 14 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/heap-sra-4.ll | 14 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/heap-sra-phi.ll | 8 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/malloc-promote-1.ll | 9 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/malloc-promote-2.ll | 8 | ||||
-rw-r--r-- | test/Transforms/GlobalOpt/malloc-promote-3.ll | 8 |
19 files changed, 252 insertions, 202 deletions
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp index f17a950..a443ad4 100644 --- a/examples/BrainF/BrainF.cpp +++ b/examples/BrainF/BrainF.cpp @@ -81,8 +81,11 @@ void BrainF::header(LLVMContext& C) { ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal)); BasicBlock* BB = builder->GetInsertBlock(); const Type* IntPtrTy = IntegerType::getInt32Ty(C); - ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, IntegerType::getInt8Ty(C), - val_mem, NULL, "arr"); + const Type* Int8Ty = IntegerType::getInt8Ty(C); + Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty); + allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy); + ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, + NULL, "arr"); BB->getInstList().push_back(cast<Instruction>(ptr_arr)); //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1) diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 4272194..6944564 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -33,44 +33,48 @@ bool isMalloc(const Value *I); /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst* extractMallocCall(const Value *I); -CallInst* extractMallocCall(Value *I); +const CallInst *extractMallocCall(const Value *I); +CallInst *extractMallocCall(Value *I); /// extractMallocCallFromBitCast - Returns the corresponding CallInst if the /// instruction is a bitcast of the result of a malloc call. -const CallInst* extractMallocCallFromBitCast(const Value *I); -CallInst* extractMallocCallFromBitCast(Value *I); +const CallInst *extractMallocCallFromBitCast(const Value *I); +CallInst *extractMallocCallFromBitCast(Value *I); /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. -CallInst* isArrayMalloc(Value *I, const TargetData *TD); -const CallInst* isArrayMalloc(const Value *I, +CallInst *isArrayMalloc(Value *I, const TargetData *TD); +const CallInst *isArrayMalloc(const Value *I, const TargetData *TD); /// getMallocType - Returns the PointerType resulting from the malloc call. -/// This PointerType is the result type of the call's only bitcast use. -/// If there is no unique bitcast use, then return NULL. -const PointerType* getMallocType(const CallInst *CI); +/// The PointerType depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +const PointerType *getMallocType(const CallInst *CI); -/// getMallocAllocatedType - Returns the Type allocated by malloc call. This -/// Type is the result type of the call's only bitcast use. If there is no -/// unique bitcast use, then return NULL. -const Type* getMallocAllocatedType(const CallInst *CI); +/// getMallocAllocatedType - Returns the Type allocated by malloc call. +/// The Type depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +const Type *getMallocAllocatedType(const CallInst *CI); /// getMallocArraySize - Returns the array size of a malloc call. If the /// argument passed to malloc is a multiple of the size of the malloced type, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value* getMallocArraySize(CallInst *CI, const TargetData *TD); +Value *getMallocArraySize(CallInst *CI, const TargetData *TD); //===----------------------------------------------------------------------===// // free Call Utility Functions. // /// isFreeCall - Returns true if the the value is a call to the builtin free() -bool isFreeCall(const Value* I); +bool isFreeCall(const Value *I); } // End llvm namespace diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index 28854df..5b48e1a 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -899,11 +899,12 @@ public: /// 3. Bitcast the result of the malloc call to the specified type. static Instruction *CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, const Type *AllocTy, - Value *ArraySize = 0, + Value *AllocSize, Value *ArraySize = 0, const Twine &Name = ""); static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy, const Type *AllocTy, - Value *ArraySize = 0, Function* MallocF = 0, + Value *AllocSize, Value *ArraySize = 0, + Function* MallocF = 0, const Twine &Name = ""); /// CreateFree - Generate the IR for a call to the builtin free function. static void CreateFree(Value* Source, Instruction *InsertBefore); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 2ca004e..6e20940 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -17,6 +17,7 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -95,45 +96,47 @@ static Value *isArrayMallocHelper(const CallInst *CI, const TargetData *TD) { if (!CI) return NULL; - // Type must be known to determine array size. + // The size of the malloc's result type must be known to determine array size. const Type *T = getMallocAllocatedType(CI); - if (!T) + if (!T || !T->isSized() || !TD) return NULL; Value *MallocArg = CI->getOperand(1); + const Type *ArgType = MallocArg->getType(); ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg); BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg); - Constant *ElementSize = ConstantExpr::getSizeOf(T); - ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, - MallocArg->getType()); - Constant *FoldedElementSize = - ConstantFoldConstantExpression(cast<ConstantExpr>(ElementSize), TD); + unsigned ElementSizeInt = TD->getTypeAllocSize(T); + if (const StructType *ST = dyn_cast<StructType>(T)) + ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes(); + Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt); // First, check if CI is a non-array malloc. - if (CO && ((CO == ElementSize) || - (FoldedElementSize && (CO == FoldedElementSize)))) + if (CO && CO == ElementSize) // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. - return ConstantInt::get(MallocArg->getType(), 1); + return ConstantInt::get(ArgType, 1); // Second, check if CI is an array malloc whose array size can be determined. - if (isConstantOne(ElementSize) || - (FoldedElementSize && isConstantOne(FoldedElementSize))) + if (isConstantOne(ElementSize)) return MallocArg; + if (ConstantInt *CInt = dyn_cast<ConstantInt>(MallocArg)) + if (CInt->getZExtValue() % ElementSizeInt == 0) + return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt); + if (!CO && !BO) return NULL; Value *Op0 = NULL; Value *Op1 = NULL; unsigned Opcode = 0; - if (CO && ((CO->getOpcode() == Instruction::Mul) || + if (CO && ((CO->getOpcode() == Instruction::Mul) || (CO->getOpcode() == Instruction::Shl))) { Op0 = CO->getOperand(0); Op1 = CO->getOperand(1); Opcode = CO->getOpcode(); } - if (BO && ((BO->getOpcode() == Instruction::Mul) || + if (BO && ((BO->getOpcode() == Instruction::Mul) || (BO->getOpcode() == Instruction::Shl))) { Op0 = BO->getOperand(0); Op1 = BO->getOperand(1); @@ -143,12 +146,10 @@ static Value *isArrayMallocHelper(const CallInst *CI, const TargetData *TD) { // Determine array size if malloc's argument is the product of a mul or shl. if (Op0) { if (Opcode == Instruction::Mul) { - if ((Op1 == ElementSize) || - (FoldedElementSize && (Op1 == FoldedElementSize))) + if (Op1 == ElementSize) // ArraySize * ElementSize return Op0; - if ((Op0 == ElementSize) || - (FoldedElementSize && (Op0 == FoldedElementSize))) + if (Op0 == ElementSize) // ElementSize * ArraySize return Op1; } @@ -160,11 +161,10 @@ static Value *isArrayMallocHelper(const CallInst *CI, const TargetData *TD) { uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); Value *Op1Pow = ConstantInt::get(Op1CI->getContext(), APInt(Op1Int.getBitWidth(), 0).set(BitToSet)); - if (Op0 == ElementSize || (FoldedElementSize && Op0 == FoldedElementSize)) + if (Op0 == ElementSize) // ArraySize << log2(ElementSize) return Op1Pow; - if (Op1Pow == ElementSize || - (FoldedElementSize && Op1Pow == FoldedElementSize)) + if (Op1Pow == ElementSize) // ElementSize << log2(ArraySize) return Op0; } @@ -202,35 +202,41 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { } /// getMallocType - Returns the PointerType resulting from the malloc call. -/// This PointerType is the result type of the call's only bitcast use. -/// If there is no unique bitcast use, then return NULL. +/// The PointerType depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. const PointerType *llvm::getMallocType(const CallInst *CI) { assert(isMalloc(CI) && "GetMallocType and not malloc call"); - const BitCastInst *BCI = NULL; - + const PointerType *MallocType = NULL; + unsigned NumOfBitCastUses = 0; + // Determine if CallInst has a bitcast use. for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) - if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)))) - break; + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) { + MallocType = cast<PointerType>(BCI->getDestTy()); + NumOfBitCastUses++; + } - // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's - // destination type. - if (BCI && CI->hasOneUse()) - return cast<PointerType>(BCI->getDestTy()); + // Malloc call has 1 bitcast use, so type is the bitcast's destination type. + if (NumOfBitCastUses == 1) + return MallocType; // Malloc call was not bitcast, so type is the malloc function's return type. - if (!BCI) + if (NumOfBitCastUses == 0) return cast<PointerType>(CI->getType()); // Type could not be determined. return NULL; } -/// getMallocAllocatedType - Returns the Type allocated by malloc call. This -/// Type is the result type of the call's only bitcast use. If there is no -/// unique bitcast use, then return NULL. +/// getMallocAllocatedType - Returns the Type allocated by malloc call. +/// The Type depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. const Type *llvm::getMallocAllocatedType(const CallInst *CI) { const PointerType *PT = getMallocType(CI); return PT ? PT->getElementType() : NULL; diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index b53d9383..63af42d 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -3619,12 +3619,14 @@ bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, // Autoupgrade old malloc instruction to malloc call. // FIXME: Remove in LLVM 3.0. const Type *IntPtrTy = Type::getInt32Ty(Context); + Constant *AllocSize = ConstantExpr::getSizeOf(Ty); + AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy); if (!MallocF) // Prototype malloc as "void *(int32)". // This function is renamed as "malloc" in ValidateEndOfModule(). MallocF = cast<Function>( M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL)); - Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, Size, MallocF); + Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF); return false; } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 68527e3..9916388 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2101,8 +2101,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (!Ty || !Size) return Error("Invalid MALLOC record"); if (!CurBB) return Error("Invalid malloc instruction with no BB"); const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext()); + Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType()); + AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty); I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(), - Size, NULL); + AllocSize, Size, NULL); InstructionList.push_back(I); break; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 0378231..408ac3c 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -812,31 +812,41 @@ static void ConstantPropUsersOf(Value *V) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - BitCastInst *BCI, + const Type *AllocTy, Value* NElems, TargetData* TD) { - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV - << " CALL = " << *CI << " BCI = " << *BCI << '\n'); + DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); const Type *IntPtrTy = TD->getIntPtrType(GV->getContext()); + // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have + // returned NULL and we would not be here). + BitCastInst *BCI = NULL; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) + if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)))) + break; + ConstantInt *NElements = cast<ConstantInt>(NElems); if (NElements->getZExtValue() != 1) { // If we have an array allocation, transform it to a single element // allocation to make the code below simpler. - Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), - NElements->getZExtValue()); - Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); - Instruction* NewMI = cast<Instruction>(NewM); + Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue()); + unsigned TypeSize = TD->getTypeAllocSize(NewTy); + if (const StructType *ST = dyn_cast<StructType>(NewTy)) + TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); + Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy, + ConstantInt::get(IntPtrTy, TypeSize)); Value* Indices[2]; Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); - Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, - NewMI->getName()+".el0", CI); - BCI->replaceAllUsesWith(NewGEP); - BCI->eraseFromParent(); + Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2, + NewCI->getName()+".el0", CI); + Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI); + if (BCI) BCI->replaceAllUsesWith(NewGEP); + CI->replaceAllUsesWith(Cast); + if (BCI) BCI->eraseFromParent(); CI->eraseFromParent(); - BCI = cast<BitCastInst>(NewMI); - CI = extractMallocCallFromBitCast(NewMI); + BCI = dyn_cast<BitCastInst>(NewCI); + CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI); } // Create the new global variable. The contents of the malloc'd memory is @@ -850,8 +860,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, GV, GV->isThreadLocal()); - // Anything that used the malloc now uses the global directly. - BCI->replaceAllUsesWith(NewGV); + // Anything that used the malloc or its bitcast now uses the global directly. + if (BCI) BCI->replaceAllUsesWith(NewGV); + CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI)); Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) @@ -919,9 +930,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, GV->getParent()->getGlobalList().insert(GV, InitBool); - // Now the GV is dead, nuke it and the malloc. + // Now the GV is dead, nuke it and the malloc (both CI and BCI). GV->eraseFromParent(); - BCI->eraseFromParent(); + if (BCI) BCI->eraseFromParent(); CI->eraseFromParent(); // To further other optimizations, loop over all users of NewGV and try to @@ -1255,12 +1266,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, - CallInst *CI, BitCastInst* BCI, - Value* NElems, - TargetData *TD) { - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI - << " BITCAST = " << *BCI << '\n'); +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, + Value* NElems, TargetData *TD) { + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast<StructType>(MAT); @@ -1268,8 +1276,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, // it into GV). If there are other uses, change them to be uses of // the global to simplify later code. This also deletes the store // into GV. - ReplaceUsesOfMallocWithGlobal(BCI, GV); - + ReplaceUsesOfMallocWithGlobal(CI, GV); + // Okay, at this point, there are no users of the malloc. Insert N // new mallocs at the same place as CI, and N globals. std::vector<Value*> FieldGlobals; @@ -1287,11 +1295,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(CI->getContext()), - FieldTy, NElems, - BCI->getName() + ".f" + Twine(FieldNo)); + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); + if (const StructType* ST = dyn_cast<StructType>(FieldTy)) + TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); + const Type* IntPtrTy = TD->getIntPtrType(CI->getContext()); + Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, + ConstantInt::get(IntPtrTy, TypeSize), + NElems, + CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); - new StoreInst(NMI, NGV, BCI); + new StoreInst(NMI, NGV, CI); } // The tricky aspect of this transformation is handling the case when malloc @@ -1308,18 +1321,18 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, // } Value *RunningOr = 0; for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull"); + Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); if (!RunningOr) RunningOr = Cond; // First seteq else - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); } // Split the basic block at the old malloc. - BasicBlock *OrigBB = BCI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); + BasicBlock *OrigBB = CI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. @@ -1356,9 +1369,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, } BranchInst::Create(ContBB, NullPtrBlock); - - // CI and BCI are no longer needed, remove them. - BCI->eraseFromParent(); + + // CI is no longer needed, remove it. CI->eraseFromParent(); /// InsertedScalarizedLoads - As we process loads, if we can't immediately @@ -1444,13 +1456,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - BitCastInst *BCI, + const Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { - // If we can't figure out the type being malloced, then we can't optimize. - const Type *AllocTy = getMallocAllocatedType(CI); - assert(AllocTy); - // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1471,7 +1479,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // for. { SmallPtrSet<PHINode*, 8> PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) return false; } @@ -1479,16 +1487,15 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. - Value *NElems = getMallocArraySize(CI, TD); // We cannot optimize the malloc if we cannot determine malloc array size. - if (NElems) { + if (Value *NElems = getMallocArraySize(CI, TD)) { if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. if (TD && NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, TD); + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD); return true; } @@ -1506,28 +1513,28 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This the structure has an unreasonable number of fields, leave it // alone. if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - Value *NumElements = - ConstantInt::get(Type::getInt32Ty(CI->getContext()), - AT->getNumElements()); - Value *NewMI = CallInst::CreateMalloc(CI, - TD->getIntPtrType(CI->getContext()), - AllocSTy, NumElements, - BCI->getName()); - Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); - BCI->replaceAllUsesWith(Cast); - BCI->eraseFromParent(); + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); - BCI = cast<BitCastInst>(NewMI); - CI = extractMallocCallFromBitCast(NewMI); + CI = dyn_cast<BitCastInst>(Malloc) ? + extractMallocCallFromBitCast(Malloc): + cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, TD); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD), TD); return true; } } @@ -1559,14 +1566,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - if (getMallocAllocatedType(CI)) { - BitCastInst* BCI = NULL; - for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); - UI != E; ) - BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)); - if (BCI && TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD)) - return true; - } + const Type* MallocType = getMallocAllocatedType(CI); + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + GVI, TD)) + return true; } } diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 9a49d42..1a34180 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1699,18 +1699,24 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { - const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); - return wrap(unwrap(B)->Insert(CallInst::CreateMalloc( - unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), 0, 0, ""), - Twine(Name))); + const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); + Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); + AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); + Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), + ITy, unwrap(Ty), AllocSize, + 0, 0, ""); + return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name) { - const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); - return wrap(unwrap(B)->Insert(CallInst::CreateMalloc( - unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), unwrap(Val), 0, ""), - Twine(Name))); + const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); + Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); + AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); + Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), + ITy, unwrap(Ty), AllocSize, + unwrap(Val), 0, ""); + return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 3070241..b5e1a1b 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetData.h" using namespace llvm; @@ -448,22 +449,11 @@ static bool IsConstantOne(Value *val) { return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne(); } -static Value *checkArraySize(Value *Amt, const Type *IntPtrTy) { - if (!Amt) - Amt = ConstantInt::get(IntPtrTy, 1); - else { - assert(!isa<BasicBlock>(Amt) && - "Passed basic block into malloc size parameter! Use other ctor"); - assert(Amt->getType() == IntPtrTy && - "Malloc array size is not an intptr!"); - } - return Amt; -} - static Instruction *createMalloc(Instruction *InsertBefore, BasicBlock *InsertAtEnd, const Type *IntPtrTy, - const Type *AllocTy, Value *ArraySize, - Function *MallocF, const Twine &NameStr) { + const Type *AllocTy, Value *AllocSize, + Value *ArraySize, Function *MallocF, + const Twine &Name) { assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) && "createMalloc needs either InsertBefore or InsertAtEnd"); @@ -471,10 +461,14 @@ static Instruction *createMalloc(Instruction *InsertBefore, // bitcast (i8* malloc(typeSize)) to type* // malloc(type, arraySize) becomes: // bitcast (i8 *malloc(typeSize*arraySize)) to type* - Value *AllocSize = ConstantExpr::getSizeOf(AllocTy); - AllocSize = ConstantExpr::getTruncOrBitCast(cast<Constant>(AllocSize), - IntPtrTy); - ArraySize = checkArraySize(ArraySize, IntPtrTy); + if (!ArraySize) + ArraySize = ConstantInt::get(IntPtrTy, 1); + else if (ArraySize->getType() != IntPtrTy) { + if (InsertBefore) + ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore); + else + ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd); + } if (!IsConstantOne(ArraySize)) { if (IsConstantOne(AllocSize)) { @@ -513,14 +507,14 @@ static Instruction *createMalloc(Instruction *InsertBefore, Result = MCall; if (Result->getType() != AllocPtrType) // Create a cast instruction to convert to the right type... - Result = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore); + Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore); } else { MCall = CallInst::Create(MallocF, AllocSize, "malloccall"); Result = MCall; if (Result->getType() != AllocPtrType) { InsertAtEnd->getInstList().push_back(MCall); // Create a cast instruction to convert to the right type... - Result = new BitCastInst(MCall, AllocPtrType, NameStr); + Result = new BitCastInst(MCall, AllocPtrType, Name); } } MCall->setTailCall(); @@ -539,8 +533,9 @@ static Instruction *createMalloc(Instruction *InsertBefore, /// 3. Bitcast the result of the malloc call to the specified type. Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, const Type *AllocTy, - Value *ArraySize, const Twine &Name) { - return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, + Value *AllocSize, Value *ArraySize, + const Twine &Name) { + return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize, ArraySize, NULL, Name); } @@ -554,9 +549,9 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, /// responsibility of the caller. Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy, const Type *AllocTy, - Value *ArraySize, Function* MallocF, - const Twine &Name) { - return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, + Value *AllocSize, Value *ArraySize, + Function *MallocF, const Twine &Name) { + return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize, ArraySize, MallocF, Name); } diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll index c0b0606..267c3b8 100644 --- a/test/Analysis/PointerTracking/sizes.ll +++ b/test/Analysis/PointerTracking/sizes.ll @@ -31,6 +31,7 @@ entry: } declare i32 @bar(i8*) +declare i32 @bar2(i64*) define i32 @foo1(i32 %n) nounwind { entry: @@ -60,11 +61,16 @@ entry: ret i32 %add16 } -define i32 @foo2(i32 %n) nounwind { +define i32 @foo2(i64 %n) nounwind { entry: - %call = malloc i8, i32 %n ; <i8*> [#uses=1] + %call = tail call i8* @malloc(i64 %n) ; <i8*> [#uses=1] ; CHECK: %call = ; CHECK: ==> %n elements, %n bytes allocated + %mallocsize = mul i64 %n, 8 ; <i64> [#uses=1] + %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1] + %call3 = bitcast i8* %malloccall to i64* ; <i64*> [#uses=1] +; CHECK: %malloccall = +; CHECK: ==> (8 * %n) elements, (8 * %n) bytes allocated %call2 = tail call i8* @calloc(i64 2, i64 4) nounwind ; <i8*> [#uses=1] ; CHECK: %call2 = ; CHECK: ==> 8 elements, 8 bytes allocated @@ -72,13 +78,17 @@ entry: ; CHECK: %call4 = ; CHECK: ==> 16 elements, 16 bytes allocated %call6 = tail call i32 @bar(i8* %call) nounwind ; <i32> [#uses=1] + %call7 = tail call i32 @bar2(i64* %call3) nounwind ; <i32> [#uses=1] %call8 = tail call i32 @bar(i8* %call2) nounwind ; <i32> [#uses=1] %call10 = tail call i32 @bar(i8* %call4) nounwind ; <i32> [#uses=1] - %add = add i32 %call8, %call6 ; <i32> [#uses=1] - %add11 = add i32 %add, %call10 ; <i32> [#uses=1] + %add = add i32 %call8, %call6 ; <i32> [#uses=1] + %add10 = add i32 %add, %call7 ; <i32> [#uses=1] + %add11 = add i32 %add10, %call10 ; <i32> [#uses=1] ret i32 %add11 } +declare noalias i8* @malloc(i64) nounwind + declare noalias i8* @calloc(i64, i64) nounwind declare noalias i8* @realloc(i8* nocapture, i64) nounwind diff --git a/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll b/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll index abd3109..d3c3ff5 100644 --- a/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll +++ b/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -globalopt +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.s_annealing_sched = type { i32, float, float, float, float } %struct.s_bb = type { i32, i32, i32, i32 } @@ -96,7 +97,9 @@ bb.i34: ; preds = %bb unreachable bb1.i38: ; preds = %bb - %0 = malloc %struct.s_net, i32 undef ; <%struct.s_net*> [#uses=1] + %mallocsize = mul i64 28, undef ; <i64> [#uses=1] + %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1] + %0 = bitcast i8* %malloccall to %struct.s_net* ; <%struct.s_net*> [#uses=1] br i1 undef, label %bb.i1.i39, label %my_malloc.exit2.i bb.i1.i39: ; preds = %bb1.i38 @@ -115,3 +118,5 @@ my_malloc.exit8.i: ; preds = %my_malloc.exit2.i bb7: ; preds = %bb6.preheader unreachable } + +declare noalias i8* @malloc(i64) diff --git a/test/Transforms/GlobalOpt/heap-sra-1.ll b/test/Transforms/GlobalOpt/heap-sra-1.ll index 6df559e..9d5148f 100644 --- a/test/Transforms/GlobalOpt/heap-sra-1.ll +++ b/test/Transforms/GlobalOpt/heap-sra-1.ll @@ -1,18 +1,22 @@ -; RUN: opt < %s -globalopt -S | grep {@X.f0} -; RUN: opt < %s -globalopt -S | grep {@X.f1} -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" +; RUN: opt < %s -globalopt -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null +; CHECK: @X.f0 +; CHECK: @X.f1 -define void @bar(i32 %Size) nounwind noinline { +define void @bar(i64 %Size) nounwind noinline { entry: - %.sub = malloc %struct.foo, i32 %Size + %mallocsize = mul i64 %Size, 8 ; <i64> [#uses=1] + %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1] + %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] store %struct.foo* %.sub, %struct.foo** @X, align 4 ret void } +declare noalias i8* @malloc(i64) + define i32 @baz() nounwind readonly noinline { bb1.thread: %0 = load %struct.foo** @X, align 4 diff --git a/test/Transforms/GlobalOpt/heap-sra-2.ll b/test/Transforms/GlobalOpt/heap-sra-2.ll index 5a3c3cd..fa8c362 100644 --- a/test/Transforms/GlobalOpt/heap-sra-2.ll +++ b/test/Transforms/GlobalOpt/heap-sra-2.ll @@ -1,20 +1,22 @@ -; RUN: opt < %s -globalopt -S | grep {@X.f0} -; RUN: opt < %s -globalopt -S | grep {@X.f1} +; RUN: opt < %s -globalopt -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2] +; CHECK: @X.f0 +; CHECK: @X.f1 define void @bar(i32 %Size) nounwind noinline { entry: - %0 = malloc [1000000 x %struct.foo] - ;%.sub = bitcast [1000000 x %struct.foo]* %0 to %struct.foo* + %malloccall = tail call i8* @malloc(i64 8000000) ; <i8*> [#uses=1] + %0 = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1] %.sub = getelementptr [1000000 x %struct.foo]* %0, i32 0, i32 0 ; <%struct.foo*> [#uses=1] store %struct.foo* %.sub, %struct.foo** @X, align 4 ret void } +declare noalias i8* @malloc(i64) + define i32 @baz() nounwind readonly noinline { bb1.thread: %0 = load %struct.foo** @X, align 4 ; <%struct.foo*> [#uses=1] diff --git a/test/Transforms/GlobalOpt/heap-sra-3.ll b/test/Transforms/GlobalOpt/heap-sra-3.ll index 1496485..cbbcdfc 100644 --- a/test/Transforms/GlobalOpt/heap-sra-3.ll +++ b/test/Transforms/GlobalOpt/heap-sra-3.ll @@ -1,24 +1,22 @@ ; RUN: opt < %s -globalopt -S | FileCheck %s - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin10" +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null ; CHECK: @X.f0 ; CHECK: @X.f1 -define void @bar(i32 %Size) nounwind noinline { +define void @bar(i64 %Size) nounwind noinline { entry: - %mallocsize = mul i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), %Size, ; <i32> [#uses=1] -; CHECK: mul i32 %Size - %malloccall = tail call i8* @malloc(i32 %mallocsize) ; <i8*> [#uses=1] + %mallocsize = mul i64 8, %Size, ; <i64> [#uses=1] +; CHECK: mul i64 %Size, 4 + %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1] %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] store %struct.foo* %.sub, %struct.foo** @X, align 4 ret void } -declare noalias i8* @malloc(i32) +declare noalias i8* @malloc(i64) define i32 @baz() nounwind readonly noinline { bb1.thread: diff --git a/test/Transforms/GlobalOpt/heap-sra-4.ll b/test/Transforms/GlobalOpt/heap-sra-4.ll index ae97ef1..d5a5828 100644 --- a/test/Transforms/GlobalOpt/heap-sra-4.ll +++ b/test/Transforms/GlobalOpt/heap-sra-4.ll @@ -1,24 +1,22 @@ ; RUN: opt < %s -globalopt -S | FileCheck %s - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null ; CHECK: @X.f0 ; CHECK: @X.f1 -define void @bar(i32 %Size) nounwind noinline { +define void @bar(i64 %Size) nounwind noinline { entry: - %mallocsize = shl i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), 9, ; <i32> [#uses=1] - %malloccall = tail call i8* @malloc(i32 %mallocsize) ; <i8*> [#uses=1] -; CHECK: @malloc(i32 mul (i32 512 + %mallocsize = shl i64 %Size, 3 ; <i64> [#uses=1] + %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1] +; CHECK: mul i64 %Size, 4 %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] store %struct.foo* %.sub, %struct.foo** @X, align 4 ret void } -declare noalias i8* @malloc(i32) +declare noalias i8* @malloc(i64) define i32 @baz() nounwind readonly noinline { bb1.thread: diff --git a/test/Transforms/GlobalOpt/heap-sra-phi.ll b/test/Transforms/GlobalOpt/heap-sra-phi.ll index 2eba944..6188e5a 100644 --- a/test/Transforms/GlobalOpt/heap-sra-phi.ll +++ b/test/Transforms/GlobalOpt/heap-sra-phi.ll @@ -1,19 +1,21 @@ ; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. } ; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. } +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2] define void @bar(i32 %Size) nounwind noinline { entry: - %tmp = malloc [1000000 x %struct.foo] ; <[1000000 x %struct.foo]*> [#uses=1] + %malloccall = tail call i8* @malloc(i64 8000000) ; <i8*> [#uses=1] + %tmp = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1] %.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0 ; <%struct.foo*> [#uses=1] store %struct.foo* %.sub, %struct.foo** @X, align 4 ret void } +declare noalias i8* @malloc(i64) + define i32 @baz() nounwind readonly noinline { bb1.thread: %tmpLD1 = load %struct.foo** @X, align 4 ; <%struct.foo*> [#uses=1] diff --git a/test/Transforms/GlobalOpt/malloc-promote-1.ll b/test/Transforms/GlobalOpt/malloc-promote-1.ll index fd510e3..51ccbbd 100644 --- a/test/Transforms/GlobalOpt/malloc-promote-1.ll +++ b/test/Transforms/GlobalOpt/malloc-promote-1.ll @@ -1,19 +1,24 @@ -; RUN: opt < %s -globalopt -S | not grep global +; RUN: opt < %s -globalopt -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @G = internal global i32* null ; <i32**> [#uses=3] +; CHECK-NOT: global define void @init() { - %P = malloc i32 ; <i32*> [#uses=1] + %malloccall = tail call i8* @malloc(i64 4) ; <i8*> [#uses=1] + %P = bitcast i8* %malloccall to i32* ; <i32*> [#uses=1] store i32* %P, i32** @G %GV = load i32** @G ; <i32*> [#uses=1] store i32 0, i32* %GV ret void } +declare noalias i8* @malloc(i64) + define i32 @get() { %GV = load i32** @G ; <i32*> [#uses=1] %V = load i32* %GV ; <i32> [#uses=1] ret i32 %V +; CHECK: ret i32 0 } diff --git a/test/Transforms/GlobalOpt/malloc-promote-2.ll b/test/Transforms/GlobalOpt/malloc-promote-2.ll index d3d2252..f989b79 100644 --- a/test/Transforms/GlobalOpt/malloc-promote-2.ll +++ b/test/Transforms/GlobalOpt/malloc-promote-2.ll @@ -1,11 +1,11 @@ ; RUN: opt < %s -globalopt -globaldce -S | not grep malloc -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i686-apple-darwin8" +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @G = internal global i32* null ; <i32**> [#uses=3] define void @init() { - %P = malloc i32, i32 100 ; <i32*> [#uses=1] + %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; <i8*> [#uses=1] + %P = bitcast i8* %malloccall to i32* ; <i32*> [#uses=1] store i32* %P, i32** @G %GV = load i32** @G ; <i32*> [#uses=1] %GVe = getelementptr i32* %GV, i32 40 ; <i32*> [#uses=1] @@ -13,6 +13,8 @@ define void @init() { ret void } +declare noalias i8* @malloc(i64) + define i32 @get() { %GV = load i32** @G ; <i32*> [#uses=1] %GVe = getelementptr i32* %GV, i32 40 ; <i32*> [#uses=1] diff --git a/test/Transforms/GlobalOpt/malloc-promote-3.ll b/test/Transforms/GlobalOpt/malloc-promote-3.ll index a920b61..57f937d 100644 --- a/test/Transforms/GlobalOpt/malloc-promote-3.ll +++ b/test/Transforms/GlobalOpt/malloc-promote-3.ll @@ -1,11 +1,11 @@ ; RUN: opt < %s -globalopt -globaldce -S | not grep malloc -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i686-apple-darwin8" +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @G = internal global i32* null ; <i32**> [#uses=4] define void @init() { - %P = malloc i32, i32 100 ; <i32*> [#uses=1] + %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; <i8*> [#uses=1] + %P = bitcast i8* %malloccall to i32* ; <i32*> [#uses=1] store i32* %P, i32** @G %GV = load i32** @G ; <i32*> [#uses=1] %GVe = getelementptr i32* %GV, i32 40 ; <i32*> [#uses=1] @@ -13,6 +13,8 @@ define void @init() { ret void } +declare noalias i8* @malloc(i64) + define i32 @get() { %GV = load i32** @G ; <i32*> [#uses=1] %GVe = getelementptr i32* %GV, i32 40 ; <i32*> [#uses=1] |