diff options
author | Stephen Hines <srhines@google.com> | 2013-03-05 23:27:24 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-03-05 23:27:24 -0800 |
commit | 5adb136be579e8fff3734461580cb34d1d2983b8 (patch) | |
tree | bff1a422e9c9789df563aaf9a7e91e63e8ec0384 /lib/Analysis | |
parent | 227a4a4ade38716ba9eb3205f48b52910f3b955e (diff) | |
parent | b3201c5cf1e183d840f7c99ff779d57f1549d8e5 (diff) | |
download | external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.zip external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.gz external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.bz2 |
Merge commit 'b3201c5cf1e183d840f7c99ff779d57f1549d8e5' into merge_20130226
Conflicts:
include/llvm/Support/ELF.h
lib/Support/DeltaAlgorithm.cpp
Change-Id: I24a4fbce62eb39d924efee3c687b55e1e17b30cd
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/AliasAnalysis.cpp | 16 | ||||
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Analysis/CodeMetrics.cpp | 128 | ||||
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 220 | ||||
-rw-r--r-- | lib/Analysis/CostModel.cpp | 23 | ||||
-rw-r--r-- | lib/Analysis/IPA/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Analysis/IPA/CallPrinter.cpp | 87 | ||||
-rw-r--r-- | lib/Analysis/IPA/IPA.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/IPA/InlineCost.cpp (renamed from lib/Analysis/InlineCost.cpp) | 74 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 260 | ||||
-rw-r--r-- | lib/Analysis/LazyValueInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Analysis/Lint.cpp | 84 | ||||
-rw-r--r-- | lib/Analysis/Loads.cpp | 3 | ||||
-rw-r--r-- | lib/Analysis/LoopInfo.cpp | 50 | ||||
-rw-r--r-- | lib/Analysis/MemoryBuiltins.cpp | 37 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 12 | ||||
-rw-r--r-- | lib/Analysis/ProfileDataLoaderPass.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/ProfileInfoLoaderPass.cpp | 17 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 5 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 285 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 26 |
21 files changed, 934 insertions, 403 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index f32bd70..210b80a 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -555,19 +555,3 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } - -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -bool llvm::isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa<AllocaInst>(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 78abe0f..4c64c4a 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -18,7 +18,6 @@ add_llvm_library(LLVMAnalysis DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp - InlineCost.cpp InstCount.cpp InstructionSimplify.cpp Interval.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 1dff3d4..8cda01a 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -19,114 +20,14 @@ using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(ImmutableCallSite CS) { - if (isa<IntrinsicInst>(CS.getInstruction())) - return true; - - const Function *F = CS.getCalledFunction(); - if (!F) return false; - - if (F->hasLocalLinkage()) return false; - - if (!F->hasName()) return false; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; - - return false; -} - -bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { - if (isa<PHINode>(I)) - return true; - - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) - return GEP->hasAllConstantIndices(); - - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - return true; - } - } - - if (const CastInst *CI = dyn_cast<CastInst>(I)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast()) - return true; - - Value *Op = CI->getOperand(0); - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - if (isa<IntToPtrInst>(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) - return true; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - if (isa<PtrToIntInst>(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) - return true; - - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (TD && isa<TruncInst>(CI) && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - return true; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - return true; - } - - return false; -} - /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const DataLayout *TD) { + const TargetTransformInfo &TTI) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isInstructionFree(II, TD)) - continue; - // Special handling for calls. if (isa<CallInst>(II) || isa<InvokeInst>(II)) { ImmutableCallSite CS(cast<Instruction>(II)); @@ -144,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // for that case. if (F == BB->getParent()) isRecursive = true; - } - - if (!callIsSmall(CS)) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); + if (TTI.isLoweredToCall(F)) + ++NumCalls; + } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. if (!isa<InlineAsm>(CS.getCalledValue())) @@ -173,7 +72,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (InvI->hasFnAttr(Attribute::NoDuplicate)) notDuplicatable = true; - ++NumInsts; + NumInsts += TTI.getUserCost(&*II); } if (isa<ReturnInst>(BB->getTerminator())) @@ -195,18 +94,3 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } - -void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) { - // If this function contains a call that "returns twice" (e.g., setjmp or - // _setjmp) and it isn't marked with "returns twice" itself, never inline it. - // This is a hack because we depend on the user marking their local variables - // as volatile if they are live across a setjmp call, and they probably - // won't do this in callers. - exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); -} diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 2b7d3bd..09d7608 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -54,13 +54,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) { - ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); - if (CDV == 0) + VectorType *VTy = dyn_cast<VectorType>(C->getType()); + if (VTy == 0) return ConstantExpr::getBitCast(C, DestTy); - unsigned NumSrcElts = CDV->getType()->getNumElements(); - - Type *SrcEltTy = CDV->getType()->getElementType(); + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int // to simplify things. @@ -70,9 +69,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CDV = cast<ConstantDataVector>(C); } + ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); @@ -218,10 +220,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - int64_t &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { - Offset = 0; + Offset.clearAllBits(); return true; } @@ -235,34 +237,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (CE->getOpcode() == Instruction::GetElementPtr) { - // Cannot compute this if the element type of the pointer is missing size - // info. - if (!cast<PointerType>(CE->getOperand(0)->getType()) - ->getElementType()->isSized()) - return false; - + if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) { // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; // Otherwise, add any offset that our operands provide. - gep_type_iterator GTI = gep_type_begin(CE); - for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); - i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast<ConstantInt>(*i); - if (!CI) return false; // Index isn't a simple constant? - if (CI->isZero()) continue; // Not adding anything. - - if (StructType *ST = dyn_cast<StructType>(*GTI)) { - // N = N + Offset - Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); - } else { - SequentialType *SQT = cast<SequentialType>(*GTI); - Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); - } - } - return true; + return GEP->accumulateConstantOffset(TD, Offset); } return false; @@ -310,6 +291,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); } + if (CFP->getType()->isHalfTy()){ + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } return false; } @@ -402,7 +387,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; - if (LoadTy->isFloatTy()) + if (LoadTy->isHalfTy()) + MapTy = Type::getInt16PtrTy(C->getContext()); + else if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64PtrTy(C->getContext()); @@ -423,7 +410,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, if (BytesLoaded > 32 || BytesLoaded == 0) return 0; GlobalValue *GVal; - int64_t Offset; + APInt Offset(TD.getPointerSizeInBits(), 0); if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -434,14 +421,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset < 0) return 0; + if (Offset.isNegative()) return 0; // If we're not accessing anything in this constant, the result is undefined. - if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + if (Offset.getZExtValue() >= + TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; - if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) return 0; @@ -550,10 +538,10 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, -/// otherwise TD is null. +/// these together. If target data info is available, it is provided as DL, +/// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *TD){ + Constant *Op1, const DataLayout *DL){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -561,17 +549,44 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // bits. + if (Opc == Instruction::And && DL) { + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()); + APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); + APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); + ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + // All the bits of Op0 that the 'and' could be masking are already zero. + return Op0; + } + if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + // All the bits of Op1 that the 'and' could be masking are already zero. + return Op1; + } + + APInt KnownZero = KnownZero0 | KnownZero1; + APInt KnownOne = KnownOne0 & KnownOne1; + if ((KnownZero | KnownOne).isAllOnesValue()) { + return ConstantInt::get(Op0->getType(), KnownOne); + } + } + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && TD) { + if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - int64_t Offs1, Offs2; + unsigned PtrSize = DL->getPointerSizeInBits(); + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. - return ConstantInt::get(Op0->getType(), Offs1-Offs2); + // PtrToInt may change the bitwidth so we have convert to the right size + // first. + return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - + Offs2.zextOrTrunc(OpSize)); } } @@ -1104,6 +1119,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { + case Intrinsic::fabs: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::floor: case Intrinsic::sqrt: case Intrinsic::pow: case Intrinsic::powi: @@ -1142,8 +1164,7 @@ llvm::canConstantFoldCallTo(const Function *F) { switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || - Name == "atan" || Name == "atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': @@ -1171,11 +1192,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1187,11 +1214,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1243,7 +1276,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (!TLI) return 0; - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1256,8 +1289,46 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : - Op->getValueAPF().convertToDouble(); + double V; + if (Ty->isFloatTy()) + V = Op->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + V = Op->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + V = APF.convertToDouble(); + } + + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); +#if HAVE_LOG2 + case Intrinsic::log2: + return ConstantFoldFP(log2, V, Ty); +#endif +#if HAVE_LOG + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); +#endif +#if HAVE_LOG10 + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); +#endif +#if HAVE_EXP + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); +#endif +#if HAVE_EXP2 + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); +#endif + case Intrinsic::floor: + return ConstantFoldFP(floor, V, Ty); + } + switch (Name[0]) { case 'a': if (Name == "acos" && TLI->has(LibFunc::acos)) @@ -1299,7 +1370,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && - (Ty->isFloatTy() || Ty->isDoubleTy())) { + (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); else // Undefined @@ -1337,7 +1408,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); case Intrinsic::convert_from_fp16: { - APFloat Val(Op->getValue()); + APFloat Val(APFloat::IEEEhalf, Op->getValue()); bool lost = false; APFloat::opStatus status = @@ -1391,18 +1462,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? - (double)Op1->getValueAPF().convertToFloat() : - Op1->getValueAPF().convertToDouble(); + double Op1V; + if (Ty->isFloatTy()) + Op1V = Op1->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op1V = Op1->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op1->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op1V = APF.convertToDouble(); + } + if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? - (double)Op2->getValueAPF().convertToFloat(): - Op2->getValueAPF().convertToDouble(); + double Op2V; + if (Ty->isFloatTy()) + Op2V = Op2->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op2V = Op2->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op2->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op2V = APF.convertToDouble(); + } if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); @@ -1416,6 +1504,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) return ConstantFP::get(F->getContext(), APFloat((float)std::pow((float)Op1V, @@ -1468,12 +1560,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); } case Intrinsic::cttz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); case Intrinsic::ctlz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 1784512..44684a9 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -80,11 +80,23 @@ CostModelAnalysis::runOnFunction(Function &F) { return false; } +static bool isReverseVectorMask(SmallVector<int, 16> &Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (!TTI) return -1; switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { @@ -166,6 +178,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return TTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, + 0); + return -1; + } default: // We don't have any information on this instruction. return -1; diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 34d6d1b..67b4135 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -1,9 +1,11 @@ add_llvm_library(LLVMipa CallGraph.cpp CallGraphSCCPass.cpp + CallPrinter.cpp FindUsedTypes.cpp GlobalsModRef.cpp IPA.cpp + InlineCost.cpp ) add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp new file mode 100644 index 0000000..306ae7a --- /dev/null +++ b/lib/Analysis/IPA/CallPrinter.cpp @@ -0,0 +1,87 @@ +//===- CallPrinter.cpp - DOT printer for call graph -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-callgraph', which emit a callgraph.<fnname>.dot +// containing the call graph of a module. +// +// There is also a pass available to directly call dotty ('-view-callgraph'). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" + +using namespace llvm; + +namespace llvm { + +template<> +struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(CallGraph *Graph) { + return "Call graph"; + } + + std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { + if (Function *Func = Node->getFunction()) + return Func->getName(); + + return "external node"; + } +}; + +} // end llvm namespace + +namespace { + +struct CallGraphViewer + : public DOTGraphTraitsModuleViewer<CallGraph, true> { + static char ID; + + CallGraphViewer() + : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) { + initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct CallGraphPrinter + : public DOTGraphTraitsModulePrinter<CallGraph, true> { + static char ID; + + CallGraphPrinter() + : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} // end anonymous namespace + +char CallGraphViewer::ID = 0; +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", + "View call graph", + false, false) + +char CallGraphPrinter::ID = 0; +INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", + "Print call graph to 'dot' file", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +ModulePass *llvm::createCallGraphViewerPass() { + return new CallGraphViewer(); +} + +ModulePass *llvm::createCallGraphPrinterPass() { + return new CallGraphPrinter(); +} diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp index 0ba2e04..aa5164e 100644 --- a/lib/Analysis/IPA/IPA.cpp +++ b/lib/Analysis/IPA/IPA.cpp @@ -20,6 +20,8 @@ using namespace llvm; void llvm::initializeIPA(PassRegistry &Registry) { initializeBasicCallGraphPass(Registry); initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); initializeGlobalsModRefPass(Registry); } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 6e5c035..3292e00 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -44,6 +45,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { // DataLayout if available, or null. const DataLayout *const TD; + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + // The called function. Function &F; @@ -130,16 +134,17 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitCallSite(CallSite CS); public: - CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) - : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { - } + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -417,7 +422,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { @@ -447,7 +452,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitCastInst(CastInst &I) { @@ -464,7 +469,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { @@ -731,7 +736,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return false; } - if (!callIsSmall(CS)) { + if (TTI.isLoweredToCall(F)) { // We account for the average 1 instruction per call argument setup // here. Cost += CS.arg_size() * InlineConstants::InstrCost; @@ -764,7 +769,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -777,7 +782,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (isInstructionFree(&I, TD)) + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) return true; // We found something we don't understand or can't handle. Mark any SROA-able @@ -1132,11 +1137,35 @@ void CallAnalyzer::dump() { } #endif -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetTransformInfo>(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable<DataLayout>(); + TTI = &getAnalysis<TargetTransformInfo>(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, int Threshold) { // Cannot inline indirect calls. if (!Callee) @@ -1163,7 +1192,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TD, *Callee, Threshold); + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1177,9 +1206,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } -bool InlineCostAnalyzer::isInlineViable(Function &F) { - bool ReturnsTwice =F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain an indirect branch. if (isa<IndirectBrInst>(BI->getTerminator())) diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d97e226..4a3c74e 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -21,10 +21,10 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/Operator.h" @@ -663,12 +663,20 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. -static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, +/// +/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't +/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. +/// folding. +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Value *&V) { - if (!V->getType()->isPointerTy()) - return 0; + assert(V->getType()->getScalarType()->isPointerTy()); + + // Without DataLayout, just be conservative for now. Theoretically, more could + // be done in this case. + if (!TD) + return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -677,7 +685,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(TD, Offset)) + if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -689,23 +697,24 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, } else { break; } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + assert(V->getType()->getScalarType()->isPointerTy() && + "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext()); - return ConstantInt::get(IntPtrTy, Offset); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + if (V->getType()->isVectorTy()) + return ConstantVector::getSplat(V->getType()->getVectorNumElements(), + OffsetIntPtr); + return OffsetIntPtr; } /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout &TD, +static Constant *computePointerDifference(const DataLayout *TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. @@ -819,9 +828,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). - if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) - if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + if (Constant *Result = computePointerDifference(Q.TD, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // Mul distributes over Sub. Try some generic simplifications based on this. @@ -1684,9 +1693,48 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -static Constant *computePointerICmp(const DataLayout &TD, +// A significant optimization not implemented here is assuming that alloca +// addresses are not equal to incoming argument values. They don't *alias*, +// as we say, but that doesn't mean they aren't equal, so we take a +// conservative approach. +// +// This is inspired in part by C++11 5.10p1: +// "Two pointers of the same type compare equal if and only if they are both +// null, both point to the same function, or both represent the same +// address." +// +// This is pretty permissive. +// +// It's also partly due to C11 6.5.9p6: +// "Two pointers compare equal if and only if both are null pointers, both are +// pointers to the same object (including a pointer to an object and a +// subobject at its beginning) or function, both are pointers to one past the +// last element of the same array object, or one is a pointer to one past the +// end of one array object and the other is a pointer to the start of a +// different array object that happens to immediately follow the first array +// object in the address space.) +// +// C11's version is more restrictive, however there's no reason why an argument +// couldn't be a one-past-the-end value for a stack object in the caller and be +// equal to the beginning of a stack object in the callee. +// +// If the C and C++ standards are ever made sufficiently restrictive in this +// area, it may be possible to update LLVM's semantics accordingly and reinstate +// this optimization. +static Constant *computePointerICmp(const DataLayout *TD, + const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { + // First, skip past any trivial no-ops. + LHS = LHS->stripPointerCasts(); + RHS = RHS->stripPointerCasts(); + + // A non-null pointer is not equal to a null pointer. + if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) && + (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: @@ -1709,19 +1757,83 @@ static Constant *computePointerICmp(const DataLayout &TD, break; } + // Strip off any constant offsets so that we can reason about them. + // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets + // here and compare base addresses like AliasAnalysis does, however there are + // numerous hazards. AliasAnalysis and its utilities rely on special rules + // governing loads and stores which don't apply to icmps. Also, AliasAnalysis + // doesn't need to guarantee pointer inequality when it says NoAlias. Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; - // If LHS and RHS are not related via constant offsets to the same base - // value, there is nothing we can do here. - if (LHS != RHS) - return 0; + // If LHS and RHS are related via constant offsets to the same base + // value, we can replace it with an icmp which just compares the offsets. + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + + // Various optimizations for (in)equality comparisons. + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + // Different non-empty allocations that exist at the same time have + // different addresses (if the program can tell). Global variables always + // exist, so they always exist during the lifetime of each other and all + // allocas. Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + // + // With all that, if the offsets are within the bounds of their allocations + // (and not one-past-the-end! so we can't use inbounds!), and their + // allocations aren't the same, the pointers are not equal. + // + // Note that it's not necessary to check for LHS being a global variable + // address, due to canonicalization and constant folding. + if (isa<AllocaInst>(LHS) && + (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) { + ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset); + ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset); + uint64_t LHSSize, RHSSize; + if (LHSOffsetCI && RHSOffsetCI && + getObjectSize(LHS, LHSSize, TD, TLI) && + getObjectSize(RHS, RHSSize, TD, TLI)) { + const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); + const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); + if (!LHSOffsetValue.isNegative() && + !RHSOffsetValue.isNegative() && + LHSOffsetValue.ult(LHSSize) && + RHSOffsetValue.ult(RHSSize)) { + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } + + // Repeat the above check but this time without depending on DataLayout + // or being able to compute a precise size. + if (!cast<PointerType>(LHS->getType())->isEmptyTy() && + !cast<PointerType>(RHS->getType())->isEmptyTy() && + LHSOffset->isNullValue() && + RHSOffset->isNullValue()) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } - return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + // Otherwise, fail. + return 0; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -1786,62 +1898,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp <object*>, <object*/null> - Different identified objects have - // different addresses (unless null), and what's more the address of an - // identified local is never equal to another argument (again, barring null). - // Note that generalizing to the case where LHS is a global variable address - // or null is pointless, since if both LHS and RHS are constants then we - // already constant folded the compare, and if only one of them is then we - // moved it to RHS already. - Value *LHSPtr = LHS->stripPointerCasts(); - Value *RHSPtr = RHS->stripPointerCasts(); - if (LHSPtr == RHSPtr) - return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // Be more aggressive about stripping pointer adjustments when checking a - // comparison of an alloca address to another object. We can rip off all - // inbounds GEP operations, even if they are variable. - LHSPtr = LHSPtr->stripInBoundsOffsets(); - if (llvm::isIdentifiedObject(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { - // If both sides are different identified objects, they aren't equal - // unless they're null. - if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - - // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null or they belong to - // different functions. The latter happens during inlining. - if (Instruction *LHSInst = dyn_cast<Instruction>(LHSPtr)) - if (Argument *RHSArg = dyn_cast<Argument>(RHSPtr)) - if (LHSInst->getParent()->getParent() == RHSArg->getParent() && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - } - - // Assume that the constant null is on the right. - if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } else if (Argument *LHSArg = dyn_cast<Argument>(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument unless they come from separate - // functions via inlining. - if (AllocaInst *RHSInst = dyn_cast<AllocaInst>(RHSPtr)) { - if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } - } - // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; @@ -2468,8 +2524,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. - if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + if (LHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { @@ -2869,6 +2925,37 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } +static bool IsIdempotent(Intrinsic::ID ID) { + switch (ID) { + default: return false; + + // Unary idempotent: f(f(x)) = f(x) + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + return true; + } +} + +template <typename IterTy> +static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + // Perform idempotent optimizations + if (!IsIdempotent(IID)) + return 0; + + // Unary Ops + if (std::distance(ArgBegin, ArgEnd) == 1) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) + if (II->getIntrinsicID() == IID) + return II; + + return 0; +} + template <typename IterTy> static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, const Query &Q, unsigned MaxRecurse) { @@ -2885,6 +2972,11 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, if (!F) return 0; + if (unsigned IID = F->getIntrinsicID()) + if (Value *Ret = + SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + return Ret; + if (!canConstantFoldCallTo(F)) return 0; diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 1c94d10..66b5e85 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index fd10a6b..9393508 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -412,51 +412,49 @@ void Lint::visitMemoryReference(Instruction &I, } // Check for buffer overflows and misalignment. - if (TD) { - // Only handles memory references that read/write something simple like an - // alloca instruction or a global variable. - int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { - // OK, so the access is to a constant offset from Ptr. Check that Ptr is - // something we can handle and if so extract the size of this base object - // along with its alignment. - uint64_t BaseSize = AliasAnalysis::UnknownSize; - unsigned BaseAlign = 0; - - if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { - Type *ATy = AI->getAllocatedType(); - if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = TD->getTypeAllocSize(ATy); - BaseAlign = AI->getAlignment(); - if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = TD->getABITypeAlignment(ATy); - } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { - // If the global may be defined differently in another compilation unit - // then don't warn about funky memory accesses. - if (GV->hasDefinitiveInitializer()) { - Type *GTy = GV->getType()->getElementType(); - if (GTy->isSized()) - BaseSize = TD->getTypeAllocSize(GTy); - BaseAlign = GV->getAlignment(); - if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = TD->getABITypeAlignment(GTy); - } + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + Type *ATy = AI->getAllocatedType(); + if (TD && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (TD && BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (TD && GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (TD && BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); } - - // Accesses from before the start or after the end of the object are not - // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); - - // Accesses that say that the memory is more aligned than it is are not - // defined. - if (Align == 0 && Ty && Ty->isSized()) - Align = TD->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); } + + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (TD && Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 3158873..0902a39 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -57,8 +57,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align, const DataLayout *TD) { int64_t ByteOffset = 0; Value *Base = V; - if (TD) - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, *TD); + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); if (ByteOffset < 0) // out of bounds return false; diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 4d4c627..f1ad650 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -233,6 +234,55 @@ bool Loop::isSafeToClone() const { return true; } +bool Loop::isAnnotatedParallel() const { + + BasicBlock *latch = getLoopLatch(); + if (latch == NULL) + return false; + + MDNode *desiredLoopIdMetadata = + latch->getTerminator()->getMetadata("llvm.loop.parallel"); + + if (!desiredLoopIdMetadata) + return false; + + // The loop branch contains the parallel loop metadata. In order to ensure + // that any parallel-loop-unaware optimization pass hasn't added loop-carried + // dependencies (thus converted the loop back to a sequential loop), check + // that all the memory instructions in the loop contain parallelism metadata + // that point to the same unique "loop id metadata" the loop branch does. + for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) { + for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end(); + II != EE; II++) { + + if (!II->mayReadOrWriteMemory()) + continue; + + if (!II->getMetadata("llvm.mem.parallel_loop_access")) + return false; + + // The memory instruction can refer to the loop identifier metadata + // directly or indirectly through another list metadata (in case of + // nested parallel loops). The loop identifier metadata refers to + // itself so we can check both cases with the same routine. + MDNode *loopIdMD = + dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access")); + bool loopIdMDFound = false; + for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { + if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { + loopIdMDFound = true; + break; + } + } + + if (!loopIdMDFound) + return false; + } + } + return true; +} + + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index f88affb..0fc0550 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -385,21 +385,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); - - if (isa<Instruction>(V) || isa<GEPOperator>(V)) { - // If we have already seen this instruction, bail out. - if (!SeenInsts.insert(V)) + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If we have already seen this instruction, bail out. Cycles can happen in + // unreachable code after constant propagation. + if (!SeenInsts.insert(I)) return unknown(); - SizeOffsetType Ret; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - Ret = visitGEPOperator(*GEP); - else - Ret = visit(cast<Instruction>(*V)); - SeenInsts.erase(V); - return Ret; + return visitGEPOperator(*GEP); + return visit(*I); } - if (Argument *A = dyn_cast<Argument>(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) @@ -413,6 +408,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless + if (CE->getOpcode() == Instruction::GetElementPtr) + return visitGEPOperator(cast<GEPOperator>(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -546,21 +543,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { - if (PHI.getNumIncomingValues() == 0) - return unknown(); - - SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); - if (!bothKnown(Ret)) - return unknown(); - - // verify that all PHI incoming pointers have the same size and offset - for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { - SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); - if (!bothKnown(EdgeData) || EdgeData != Ret) - return unknown(); - } - return Ret; +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { + // too complex to analyze statically. + return unknown(); } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index eee7607..38bf5dd 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -262,7 +262,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, // If we haven't already computed the base/offset of MemLoc, do so now. if (MemLocBase == 0) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, @@ -283,11 +283,17 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, const DataLayout &TD) { // We can only extend simple integer loads. if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + return 0; // Get the base of this load. int64_t LIOffs = 0; const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); // If the two pointers are not based on the same pointer, we can't tell that // they are related. @@ -328,7 +334,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, if (LIOffs+NewLoadByteSize > MemLocEnd && LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::AddressSafety)) + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp index 51b7f1d..2ee0093 100644 --- a/lib/Analysis/ProfileDataLoaderPass.cpp +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -177,8 +177,8 @@ bool ProfileMetadataLoaderPass::runOnModule(Module &M) { unsigned ReadCount = matchEdges(M, PB, Counters); if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } NumEdgesRead = ReadCount; diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 094c107..346f8d6 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" @@ -171,8 +170,8 @@ bool LoaderPass::runOnModule(Module &M) { } } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } NumEdgesRead = ReadCount; } @@ -219,8 +218,8 @@ bool LoaderPass::runOnModule(Module &M) { } } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } NumEdgesRead = ReadCount; } @@ -240,8 +239,8 @@ bool LoaderPass::runOnModule(Module &M) { BlockInformation[F][BB] = (double)Counters[ReadCount++]; } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } } @@ -259,8 +258,8 @@ bool LoaderPass::runOnModule(Module &M) { FunctionInformation[F] = (double)Counters[ReadCount++]; } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index b87ad75..fcd7ce2 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1523,9 +1523,8 @@ Value *SCEVExpander::expand(const SCEV *S) { } // Check to see if we already expanded this here. - std::map<std::pair<const SCEV *, Instruction *>, - AssertingVH<Value> >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 63f495a..72421a0 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -9,6 +9,12 @@ #define DEBUG_TYPE "tti" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -43,6 +49,49 @@ void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfo>(); } +unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + return PrevTTI->getOperationCost(Opcode, Ty, OpTy); +} + +unsigned TargetTransformInfo::getGEPCost( + const Value *Ptr, ArrayRef<const Value *> Operands) const { + return PrevTTI->getGEPCost(Ptr, Operands); +} + +unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, + int NumArgs) const { + return PrevTTI->getCallCost(FTy, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost(const Function *F, + int NumArgs) const { + return PrevTTI->getCallCost(F, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost( + const Function *F, ArrayRef<const Value *> Arguments) const { + return PrevTTI->getCallCost(F, Arguments); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +} + +unsigned TargetTransformInfo::getUserCost(const User *U) const { + return PrevTTI->getUserCost(U); +} + +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return PrevTTI->isLoweredToCall(F); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return PrevTTI->isLegalAddImmediate(Imm); } @@ -92,6 +141,14 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return PrevTTI->getNumberOfRegisters(Vector); } +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + +unsigned TargetTransformInfo::getMaximumUnrollFactor() const { + return PrevTTI->getMaximumUnrollFactor(); +} + unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { return PrevTTI->getArithmeticInstrCost(Opcode, Ty); @@ -139,18 +196,25 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return PrevTTI->getNumberOfParts(Tp); } +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { + return PrevTTI->getAddressComputationCost(Tp); +} namespace { struct NoTTI : ImmutablePass, TargetTransformInfo { - NoTTI() : ImmutablePass(ID) { + const DataLayout *DL; + + NoTTI() : ImmutablePass(ID), DL(0) { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } virtual void initializePass() { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. + TopTTI = this; PrevTTI = 0; + DL = getAnalysisIfAvailable<DataLayout>(); } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -168,6 +232,213 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return this; } + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TCC_Free; + + // Otherwise, the default basic cost is used. + return TCC_Basic; + + case Instruction::IntToPtr: + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::PtrToInt: + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) + return TCC_Free; + + return TCC_Basic; + } + } + + unsigned getGEPCost(const Value *Ptr, + ArrayRef<const Value *> Operands) const { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa<Constant>(Operands[Idx])) + return TCC_Basic; + + return TCC_Free; + } + + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TCC_Basic * (NumArgs + 1); + } + + unsigned getCallCost(const Function *F, int NumArgs = -1) const { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); + return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!TopTTI->isLoweredToCall(F)) + return TCC_Basic; // Give a basic cost if it will be lowered directly. + + return TopTTI->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) const { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return TopTTI->getCallCost(F, Arguments.size()); + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) const { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TCC_Basic; + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + return TCC_Free; + } + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector<Type *, 8> ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); + } + + unsigned getUserCost(const User *U) const { + if (isa<PHINode>(U)) + return TCC_Free; // Model all PHI nodes as free. + + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) + // In the basic model we just assume that all-constant GEPs will be + // folded into their uses via addressing modes. + return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + + if (ImmutableCallSite CS = U) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); + } + + SmallVector<const Value *, 8> Arguments; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) + Arguments.push_back(*AI); + + return TopTTI->getCallCost(F, Arguments); + } + + if (const CastInst *CI = dyn_cast<CastInst>(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return TCC_Free; + } + + // Otherwise delegate to the fully generic implementations. + return getOperationCost(Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? + U->getOperand(0)->getType() : 0); + } + + bool isLoweredToCall(const Function *F) const { + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == + "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || + Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") + return false; + + return true; + } bool isLegalAddImmediate(int64_t Imm) const { return false; @@ -216,6 +487,14 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return 8; } + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + + unsigned getMaximumUnrollFactor() const { + return 1; + } + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { return 1; } @@ -259,6 +538,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { unsigned getNumberOfParts(Type *Tp) const { return 0; } + + unsigned getAddressComputationCost(Type *Tp) const { + return 0; + } }; } // end anonymous namespace diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index efb9b08..8e3994e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1510,7 +1510,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector<unsigned, 10> &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); + llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -1671,8 +1671,10 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &TD) { - unsigned BitWidth = TD.getPointerSizeInBits(); + const DataLayout *TD) { + // Without DataLayout, conservatively assume 64-bit offsets, which is + // the widest we support. + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; APInt ByteOffset(BitWidth, 0); while (1) { if (Ptr->getType()->isVectorTy()) @@ -1680,7 +1682,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { APInt GEPOffset(BitWidth, 0); - if (!GEP->accumulateConstantOffset(TD, GEPOffset)) + if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) break; ByteOffset += GEPOffset; Ptr = GEP->getPointerOperand(); @@ -2012,3 +2014,19 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; // Misc instructions which have effects } } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} |