diff options
| author | Chris Lattner <sabre@nondot.org> | 2008-03-22 05:37:16 +0000 | 
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2008-03-22 05:37:16 +0000 | 
| commit | 60b9cb4080a1faf9ac56b19005b86130d1590ca7 (patch) | |
| tree | 18acc6a6e983aaeeb5e3e1d80b59e62b045a2b5e | |
| parent | 1be832267a50570c9c05789aeb72f93b3f76b021 (diff) | |
| download | external_llvm-60b9cb4080a1faf9ac56b19005b86130d1590ca7.zip external_llvm-60b9cb4080a1faf9ac56b19005b86130d1590ca7.tar.gz external_llvm-60b9cb4080a1faf9ac56b19005b86130d1590ca7.tar.bz2 | |
implement an initial hack at a straight-line store -> memset optimization.
This fires dozens of times across spec and multisource, but I don't know
if it actually speeds stuff up.  Hopefully the testers will show something
nice :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48680 91177308-0d34-0410-b5e6-96231b3b80d8
| -rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 65 | ||||
| -rw-r--r-- | test/Transforms/GVN/form-memset.ll | 55 | 
2 files changed, 114 insertions, 6 deletions
| diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f72fe8b..a3a3323 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -33,6 +33,7 @@  #include "llvm/Analysis/MemoryDependenceAnalysis.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Compiler.h" +#include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Target/TargetData.h"  using namespace llvm; @@ -1036,11 +1037,63 @@ static Value *isBytewiseValue(Value *V) {    return 0;  } +static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, +                                  bool &VariableIdxFound, TargetData &TD) { +  // Skip over the first indices. +  gep_type_iterator GTI = gep_type_begin(GEP); +  for (unsigned i = 1; i != Idx; ++i, ++GTI) +    /*skip along*/; +   +  // Compute the offset implied by the rest of the indices. +  int64_t Offset = 0; +  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { +    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); +    if (OpC == 0) +      return VariableIdxFound = true; +    if (OpC->isZero()) continue;  // No offset. + +    // Handle struct indices, which add their field offset to the pointer. +    if (const StructType *STy = dyn_cast<StructType>(*GTI)) { +      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); +      continue; +    } +     +    // Otherwise, we have a sequential type like an array or vector.  Multiply +    // the index by the ElementSize. +    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); +    Offset += Size*OpC->getSExtValue(); +  } + +  return Offset; +} +  /// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2  /// plus the specified constant offset.  For example, Ptr1 might be &A[42], and  /// Ptr2 might be &A[40] and Offset might be 8. -static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset) { -  return true; +static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, +                              TargetData &TD) { +  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical +  // base.  After that base, they may have some number of common (and +  // potentially variable) indices.  After that they handle some constant +  // offset, which determines their offset from each other.  At this point, we +  // handle no other case. +  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); +  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); +  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) +    return false; +   +  // Skip any common indices and track the GEP types. +  unsigned Idx = 1; +  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) +    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) +      break; + +  bool VariableIdxFound = false; +  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); +  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); +  if (VariableIdxFound) return false; +   +  return Offset1 == Offset2+(int64_t)Offset;  } @@ -1049,7 +1102,6 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset) {  /// neighboring locations of memory.  If it sees enough consequtive ones  /// (currently 4) it attempts to merge them together into a memcpy/memset.  bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) { -  return false;    if (SI->isVolatile()) return false;    // There are two cases that are interesting for this code to handle: memcpy @@ -1078,7 +1130,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {    SmallVector<StoreInst*, 16> Stores;    Stores.push_back(SI); -  BasicBlock::iterator BI = SI; ++BI; +  BasicBlock::iterator BI = SI;    for (++BI; !isa<TerminatorInst>(BI); ++BI) {      if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {         // If the call is readnone, ignore it, otherwise bail out.  We don't even @@ -1110,7 +1162,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {      // If so, check to see if the store is before the current range or after it      // in either case, extend the range, otherwise reject it. -    if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar)) { +    if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) {        // Okay, this extends the stored area on the end, just add to the bytes        // so far and remember this store.        BytesSoFar += AccessSize; @@ -1118,7 +1170,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {        continue;      } -    if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize)) { +    if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) {        // Okay, the store is before the current range.  Reset our start pointer        // and get new alignment info etc.        BytesSoFar  += AccessSize; @@ -1170,6 +1222,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {    };    new CallInst(F, Ops, Ops+4, "", InsertPt); +  // Zap all the stores.    toErase.append(Stores.begin(), Stores.end());    ++NumMemSetInfer; diff --git a/test/Transforms/GVN/form-memset.ll b/test/Transforms/GVN/form-memset.ll new file mode 100644 index 0000000..a026cde --- /dev/null +++ b/test/Transforms/GVN/form-memset.ll @@ -0,0 +1,55 @@ +; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep store +; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep llvm.memset + +; All the stores in this example should be merged into a single memset. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + +define void @foo(i8 signext  %c) nounwind  { +entry: +	%x = alloca [19 x i8]		; <[19 x i8]*> [#uses=20] +	%tmp = getelementptr [19 x i8]* %x, i32 0, i32 0		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp, align 1 +	%tmp5 = getelementptr [19 x i8]* %x, i32 0, i32 1		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp5, align 1 +	%tmp9 = getelementptr [19 x i8]* %x, i32 0, i32 2		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp9, align 1 +	%tmp13 = getelementptr [19 x i8]* %x, i32 0, i32 3		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp13, align 1 +	%tmp17 = getelementptr [19 x i8]* %x, i32 0, i32 4		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp17, align 1 +	%tmp21 = getelementptr [19 x i8]* %x, i32 0, i32 5		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp21, align 1 +	%tmp25 = getelementptr [19 x i8]* %x, i32 0, i32 6		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp25, align 1 +	%tmp29 = getelementptr [19 x i8]* %x, i32 0, i32 7		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp29, align 1 +	%tmp33 = getelementptr [19 x i8]* %x, i32 0, i32 8		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp33, align 1 +	%tmp37 = getelementptr [19 x i8]* %x, i32 0, i32 9		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp37, align 1 +	%tmp41 = getelementptr [19 x i8]* %x, i32 0, i32 10		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp41, align 1 +	%tmp45 = getelementptr [19 x i8]* %x, i32 0, i32 11		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp45, align 1 +	%tmp49 = getelementptr [19 x i8]* %x, i32 0, i32 12		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp49, align 1 +	%tmp53 = getelementptr [19 x i8]* %x, i32 0, i32 13		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp53, align 1 +	%tmp57 = getelementptr [19 x i8]* %x, i32 0, i32 14		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp57, align 1 +	%tmp61 = getelementptr [19 x i8]* %x, i32 0, i32 15		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp61, align 1 +	%tmp65 = getelementptr [19 x i8]* %x, i32 0, i32 16		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp65, align 1 +	%tmp69 = getelementptr [19 x i8]* %x, i32 0, i32 17		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp69, align 1 +	%tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1] +	store i8 %c, i8* %tmp73, align 1 +	%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind 		; <i32> [#uses=0] +	ret void +} + +declare i32 @bar(...) + | 
