diff options
| author | Chris Lattner <sabre@nondot.org> | 2008-03-28 06:45:13 +0000 | 
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2008-03-28 06:45:13 +0000 | 
| commit | 49650d6fb309afe722c327c44a7f00dd57e37bdf (patch) | |
| tree | 872b855f87c656e03f7fc7be4688dd93f1b0459b /lib/Transforms | |
| parent | ced85adc8e1f686e69244edd1c2c097fa248393f (diff) | |
| download | external_llvm-49650d6fb309afe722c327c44a7f00dd57e37bdf.zip external_llvm-49650d6fb309afe722c327c44a7f00dd57e37bdf.tar.gz external_llvm-49650d6fb309afe722c327c44a7f00dd57e37bdf.tar.bz2 | |
make memset inference significantly more powerful: it can now handle 
memsets that initialize "structs of arrays" and other store sequences
that are not sequential.  This is still only enabled if you pass 
-form-memset-from-stores.  The flag is not heavily tested and I haven't
analyzed the perf regressions when -form-memset-from-stores is passed
either, but this causes no make check regressions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48909 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
| -rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 266 | 
1 files changed, 184 insertions, 82 deletions
| diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a9445dd..ba6e361 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -36,6 +36,7 @@  #include "llvm/Support/Compiler.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Target/TargetData.h" +#include <list>  using namespace llvm;  STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -1074,11 +1075,11 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,    return Offset;  } -/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2 -/// plus the specified constant offset.  For example, Ptr1 might be &A[42], and -/// Ptr2 might be &A[40] and Offset might be 8. -static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, -                              TargetData &TD) { +/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a +/// constant offset, and return that constant offset.  For example, Ptr1 might +/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8. +static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, +                            TargetData &TD) {    // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical    // base.  After that base, they may have some number of common (and    // potentially variable) indices.  After that they handle some constant @@ -1100,10 +1101,122 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset,    int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);    if (VariableIdxFound) return false; -  return Offset1 == Offset2+(int64_t)Offset; +  Offset = Offset2-Offset1; +  return true;  } +/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. +/// This allows us to analyze stores like: +///   store 0 -> P+1 +///   store 0 -> P+0 +///   store 0 -> P+3 +///   store 0 -> P+2 +/// which sometimes happens with stores to arrays of structs etc.  When we see +/// the first store, we make a range [1, 2).  The second store extends the range +/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the +/// two ranges into [0, 3) which is memset'able. +namespace { +struct MemsetRange { +  // Start/End - A semi range that describes the span that this range covers. +  // The range is closed at the start and open at the end: [Start, End).   +  int64_t Start, End; + +  /// StartPtr - The getelementptr instruction that points to the start of the +  /// range. +  Value *StartPtr; +   +  /// Alignment - The known alignment of the first store. +  unsigned Alignment; +   +  /// TheStores - The actual stores that make up this range. +  SmallVector<StoreInst*, 16> TheStores; +}; +   +  +class MemsetRanges { +  /// Ranges - A sorted list of the memset ranges.  We use std::list here +  /// because each element is relatively large and expensive to copy. +  std::list<MemsetRange> Ranges; +  typedef std::list<MemsetRange>::iterator range_iterator; +  TargetData &TD; +public: +  MemsetRanges(TargetData &td) : TD(td) {} +   +  typedef std::list<MemsetRange>::const_iterator const_iterator; +  const_iterator begin() const { return Ranges.begin(); } +  const_iterator end() const { return Ranges.end(); } +   +   +  void addStore(int64_t OffsetFromFirst, StoreInst *SI); +}; +   +} + +/// addStore - Add a new store to the MemsetRanges data structure.  This adds a +/// new range for the specified store at the specified offset, merging into +/// existing ranges as appropriate. +void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { +  int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType()); +   +  // Do a linear search of the ranges to see if this can be joined and/or to +  // find the insertion point in the list.  We keep the ranges sorted for +  // simplicity here.  This is a linear search of a linked list, which is ugly, +  // however the number of ranges is limited, so this won't get crazy slow. +  range_iterator I = Ranges.begin(), E = Ranges.end(); +   +  while (I != E && Start > I->End) +    ++I; +   +  // We now know that I == E, in which case we didn't find anything to merge +  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need +  // to insert a new range.  Handle this now. +  if (I == E || End < I->Start) { +    MemsetRange &R = *Ranges.insert(I, MemsetRange()); +    R.Start        = Start; +    R.End          = End; +    R.StartPtr     = SI->getPointerOperand(); +    R.Alignment    = SI->getAlignment(); +    R.TheStores.push_back(SI); +    return; +  } + +  // This store overlaps with I, add it. +  I->TheStores.push_back(SI); +   +  // At this point, we may have an interval that completely contains our store. +  // If so, just add it to the interval and return. +  if (I->Start <= Start && I->End >= End) +    return; +   +  // Now we know that Start <= I->End and End >= I->Start so the range overlaps +  // but is not entirely contained within the range. +   +  // See if the range extends the start of the range.  In this case, it couldn't +  // possibly cause it to join the prior range, because otherwise we would have +  // stopped on *it*. +  if (Start < I->Start) +    I->Start = Start; +     +  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint +  // is in or right at the end of I), and that End >= I->Start.  Extend I out to +  // End. +  if (End > I->End) { +    I->End = End; +    range_iterator NextI = I;; +    while (++NextI != E && End >= NextI->Start) { +      // Merge the range in. +      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); +      if (NextI->End > I->End) +        I->End = NextI->End; +      Ranges.erase(NextI); +      NextI = I; +    } +  } +} + + +  /// processStore - When GVN is scanning forward over instructions, we look for  /// some other patterns to fold away.  In particular, this looks for stores to  /// neighboring locations of memory.  If it sees enough consequtive ones @@ -1125,18 +1238,15 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {    TargetData &TD = getAnalysis<TargetData>();    AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); -  // Okay, so we now have a single store that can be splatable.  Try to 'grow' -  // this store by looking for neighboring stores to the immediate left or right -  // of the store we have so far.  While we could in theory handle stores in -  // this order:  A[0], A[2], A[1] -  // in practice, right now we only worry about cases where stores are -  // consequtive in increasing or decreasing address order. -  uint64_t BytesSoFar = TD.getTypeStoreSize(SI->getOperand(0)->getType()); -  uint64_t BytesFromSI = 0; -  unsigned StartAlign = SI->getAlignment(); +  // Okay, so we now have a single store that can be splatable.  Scan to find +  // all subsequent stores of the same value to offset from the same pointer. +  // Join these together into ranges, so we can decide whether contiguous blocks +  // are stored. +  MemsetRanges Ranges(TD); +   +  // Add our first pointer. +  Ranges.addStore(0, SI);    Value *StartPtr = SI->getPointerOperand(); -  SmallVector<StoreInst*, 16> Stores; -  Stores.push_back(SI);    BasicBlock::iterator BI = SI;    for (++BI; !isa<TerminatorInst>(BI); ++BI) { @@ -1164,77 +1274,69 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {      // Check to see if this stored value is of the same byte-splattable value.      if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))        break; -     -    Value *ThisPointer = NextStore->getPointerOperand(); -    unsigned AccessSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); -     -    // If so, check to see if the store is before the current range or after it -    // in either case, extend the range, otherwise reject it. -    if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) { -      // Okay, this extends the stored area on the end, just add to the bytes -      // so far and remember this store. -      BytesSoFar += AccessSize; -      Stores.push_back(NextStore); -      continue; -    } -     -    if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) { -      // Okay, the store is before the current range.  Reset our start pointer -      // and get new alignment info etc. -      BytesSoFar  += AccessSize; -      BytesFromSI += AccessSize; -      Stores.push_back(NextStore); -      StartPtr = ThisPointer; -      StartAlign = NextStore->getAlignment(); -      continue; -    } -    // Otherwise, this store wasn't contiguous with our current range, bail out. -    break; +    // Check to see if this store is to a constant offset from the start ptr. +    int64_t Offset; +    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) +      break; + +    Ranges.addStore(Offset, NextStore);    } -  // If we found less than 4 stores to merge, bail out, it isn't worth losing -  // type information in llvm IR to do the transformation. -  if (Stores.size() < 4)  -    return false; -   -  // Otherwise, we do want to transform this!  Create a new memset.  We put the -  // memset right after the first store that we found in this block.  This -  // ensures that the caller will increment the iterator to  the memset before -  // it deletes all the stores. -  BasicBlock::iterator InsertPt = SI; ++InsertPt; +  Function *MemSetF = 0; -  Function *F = Intrinsic::getDeclaration(SI->getParent()->getParent() +  // Now that we have full information about ranges, loop over the ranges and +  // emit memset's for anything big enough to be worthwhile. +  bool MadeChange = false; +  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); +       I != E; ++I) { +    const MemsetRange &Range = *I; + +    // If we found less than 4 stores to merge, ignore the subrange: it isn't +    // worth losing type information in llvm IR to do the transformation. +    if (Range.TheStores.size() < 4) +      continue; +     +    // Otherwise, we do want to transform this!  Create a new memset.  We put +    // the memset right after the first store that we found in this block.  This +    // ensures that the caller will increment the iterator to the memset before +    // it deletes all the stores. +    BasicBlock::iterator InsertPt = SI; ++InsertPt; +   +    if (MemSetF == 0) +      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()                                            ->getParent(), Intrinsic::memset_i64); +     +    // StartPtr may not dominate the starting point.  Instead of using it, base +    // the destination pointer off the input to the first store in the block. +    StartPtr = SI->getPointerOperand(); +   +    // Cast the start ptr to be i8* as memset requires. +    const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); +    if (StartPtr->getType() != i8Ptr) +      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), +                                 InsertPt); +   +    // Offset the pointer if needed. +    if (Range.Start) +      StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, +                                                                  Range.Start), +                                       "ptroffset", InsertPt); +   +    Value *Ops[] = { +      StartPtr, ByteVal,   // Start, value +      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),  // size +      ConstantInt::get(Type::Int32Ty, Range.Alignment)   // align +    }; +    new CallInst(MemSetF, Ops, Ops+4, "", InsertPt); +   +    // Zap all the stores. +    toErase.append(Range.TheStores.begin(), Range.TheStores.end()); +    ++NumMemSetInfer; +    MadeChange = true; +  } -  // StartPtr may not dominate the starting point.  Instead of using it, base -  // the destination pointer off the input to the first store in the block. -  StartPtr = SI->getPointerOperand(); -   -  // Cast the start ptr to be i8* as memset requires. -  const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); -  if (StartPtr->getType() != i8Ptr) -    StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), -                               InsertPt); -   -  // Offset the pointer if needed. -  if (BytesFromSI) -    StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, -                                                                -BytesFromSI), -                                     "ptroffset", InsertPt); -   -  Value *Ops[] = { -    StartPtr, ByteVal,   // Start, value -    ConstantInt::get(Type::Int64Ty, BytesSoFar),  // size -    ConstantInt::get(Type::Int32Ty, StartAlign)   // align -  }; -  new CallInst(F, Ops, Ops+4, "", InsertPt); -   -  // Zap all the stores. -  toErase.append(Stores.begin(), Stores.end()); -   -  ++NumMemSetInfer; -  return true; +  return MadeChange;  } | 
