aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp469
1 files changed, 343 insertions, 126 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index f3ac44c..b9eb986 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -268,9 +269,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// is only subsequently read.
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
- unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
- AI.getAlignment(),
- DL, AT, &AI, DT);
+ unsigned SourceAlign = getOrEnforceKnownAlignment(
+ Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -310,6 +310,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
LI.getAlignment(), LI.getName());
+ MDBuilder MDB(NewLoad->getContext());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
MDNode *N = MDPair.second;
@@ -331,20 +332,86 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_mem_parallel_loop_access:
- case LLVMContext::MD_nonnull:
// All of these directly apply.
NewLoad->setMetadata(ID, N);
break;
+ case LLVMContext::MD_nonnull:
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
+ NewLoad->setMetadata(ID, N);
+ break;
+ }
+ // If it's integral now, translate it to !range metadata.
+ if (NewTy->isIntegerTy()) {
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt =
+ ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLoad->setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+ }
+ break;
+
case LLVMContext::MD_range:
// FIXME: It would be nice to propagate this in some way, but the type
- // conversions make it hard.
+ // conversions make it hard. If the new type is a pointer, we could
+ // translate it to !nonnull metadata.
break;
}
}
return NewLoad;
}
+/// \brief Combine a store to a new type.
+///
+/// Returns the newly created store instruction.
+static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+ Value *Ptr = SI.getPointerOperand();
+ unsigned AS = SI.getPointerAddressSpace();
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ SI.getAllMetadata(MD);
+
+ StoreInst *NewStore = IC.Builder->CreateAlignedStore(
+ V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+ SI.getAlignment());
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a store instruction changing *only its
+ // type*. The only metadata it makes sense to drop is metadata which is
+ // invalidated when the pointer type changes. This should essentially
+ // never be the case in LLVM, but we explicitly switch over only known
+ // metadata to be conservatively correct. If you are adding metadata to
+ // LLVM which pertains to stores, you almost certainly want to add it
+ // here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ // All of these directly apply.
+ NewStore->setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_nonnull:
+ case LLVMContext::MD_range:
+ // These don't apply for stores.
+ break;
+ }
+ }
+
+ return NewStore;
+}
+
/// \brief Combine loads to match the type of value their uses after looking
/// through intervening bitcasts.
///
@@ -371,6 +438,35 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
if (LI.use_empty())
return nullptr;
+ Type *Ty = LI.getType();
+
+ // Try to canonicalize loads which are only ever stored to operate over
+ // integers instead of any other type. We only do this when the loaded type
+ // is sized and has a size exactly the same as its store size and the store
+ // size is a legal integer type.
+ const DataLayout *DL = IC.getDataLayout();
+ if (!Ty->isIntegerTy() && Ty->isSized() && DL &&
+ DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) &&
+ DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) {
+ if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
+ auto *SI = dyn_cast<StoreInst>(U);
+ return SI && SI->getPointerOperand() != &LI;
+ })) {
+ LoadInst *NewLoad = combineLoadToNewType(
+ IC, LI,
+ Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty)));
+ // Replace all the stores with stores of the newly loaded value.
+ for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
+ auto *SI = cast<StoreInst>(*UI++);
+ IC.Builder->SetInsertPoint(SI);
+ combineStoreToNewValue(IC, *SI, NewLoad);
+ IC.EraseInstFromFunction(*SI);
+ }
+ assert(LI.use_empty() && "Failed to remove all users of the load!");
+ // Return the old load so the combiner can delete it safely.
+ return &LI;
+ }
+ }
// Fold away bit casts of the loaded value by loading the desired type.
if (LI.hasOneUse())
@@ -386,6 +482,181 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
return nullptr;
}
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+//
+// FIXME: This should probably live in ValueTracking (or similar).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+ const DataLayout *DL) {
+ SmallPtrSet<Value *, 4> Visited;
+ SmallVector<Value *, 4> Worklist(1, V);
+
+ do {
+ Value *P = Worklist.pop_back_val();
+ P = P->stripPointerCasts();
+
+ if (!Visited.insert(P).second)
+ continue;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+ if (GA->mayBeOverridden())
+ return false;
+ Worklist.push_back(GA->getAliasee());
+ continue;
+ }
+
+ // If we know how big this object is, and it is less than MaxSize, continue
+ // searching. Otherwise, return false.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+ if (!AI->getAllocatedType()->isSized())
+ return false;
+
+ ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!CS)
+ return false;
+
+ uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType());
+ // Make sure that, even if the multiplication below would wrap as an
+ // uint64_t, we still do the right thing.
+ if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+ return false;
+ continue;
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+ return false;
+
+ uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType());
+ if (InitSize > MaxSize)
+ return false;
+ continue;
+ }
+
+ return false;
+ } while (!Worklist.empty());
+
+ return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+ Instruction *MemI, unsigned &Idx) {
+ const DataLayout *DL = IC.getDataLayout();
+ if (GEPI->getNumOperands() < 2 || !DL)
+ return false;
+
+ // Find the first non-zero index of a GEP. If all indices are zero, return
+ // one past the last index.
+ auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+ unsigned I = 1;
+ for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+ Value *V = GEPI->getOperand(I);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ if (CI->isZero())
+ continue;
+
+ break;
+ }
+
+ return I;
+ };
+
+ // Skip through initial 'zero' indices, and find the corresponding pointer
+ // type. See if the next index is not a constant.
+ Idx = FirstNZIdx(GEPI);
+ if (Idx == GEPI->getNumOperands())
+ return false;
+ if (isa<Constant>(GEPI->getOperand(Idx)))
+ return false;
+
+ SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+ Type *AllocTy =
+ GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
+ if (!AllocTy || !AllocTy->isSized())
+ return false;
+ uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy);
+
+ // If there are more indices after the one we might replace with a zero, make
+ // sure they're all non-negative. If any of them are negative, the overall
+ // address being computed might be before the base address determined by the
+ // first non-zero index.
+ auto IsAllNonNegative = [&]() {
+ for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+ bool KnownNonNegative, KnownNegative;
+ IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+ KnownNegative, 0, MemI);
+ if (KnownNonNegative)
+ continue;
+ return false;
+ }
+
+ return true;
+ };
+
+ // FIXME: If the GEP is not inbounds, and there are extra indices after the
+ // one we'll replace, those could cause the address computation to wrap
+ // (rendering the IsAllNonNegative() check below insufficient). We can do
+ // better, ignoring zero indicies (and other indicies we can prove small
+ // enough not to wrap).
+ if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+ return false;
+
+ // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+ // also known to be dereferenceable.
+ return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+ IsAllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+ T &MemI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+ unsigned Idx;
+ if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+ Instruction *NewGEPI = GEPI->clone();
+ NewGEPI->setOperand(Idx,
+ ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+ NewGEPI->insertBefore(GEPI);
+ MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+ return NewGEPI;
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
@@ -395,9 +666,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// Attempt to improve the alignment.
if (DL) {
- unsigned KnownAlign =
- getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),
- DL, AT, &LI, DT);
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
unsigned LoadAlign = LI.getAlignment();
unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
DL->getABITypeAlignment(LI.getType());
@@ -408,6 +678,12 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
LI.setAlignment(EffectiveLoadAlign);
}
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+ Worklist.Add(NewGEPI);
+ return &LI;
+ }
+
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return nullptr;
@@ -418,7 +694,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
return ReplaceInstUsesWith(
- LI, Builder->CreateBitCast(AvailableVal, LI.getType()));
+ LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
+ LI.getName() + ".cast"));
// load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
@@ -473,119 +750,61 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
}
// load (select (cond, null, P)) -> load P
- if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
- if (C->isNullValue()) {
- LI.setOperand(0, SI->getOperand(2));
- return &LI;
- }
+ if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
+ LI.getPointerAddressSpace() == 0) {
+ LI.setOperand(0, SI->getOperand(2));
+ return &LI;
+ }
// load (select (cond, P, null)) -> load P
- if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
- if (C->isNullValue()) {
- LI.setOperand(0, SI->getOperand(1));
- return &LI;
- }
+ if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
+ LI.getPointerAddressSpace() == 0) {
+ LI.setOperand(0, SI->getOperand(1));
+ return &LI;
+ }
}
}
return nullptr;
}
-/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
-/// when possible. This makes it generally easy to do alias analysis and/or
-/// SROA/mem2reg of the memory object.
-static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
- User *CI = cast<User>(SI.getOperand(1));
- Value *CastOp = CI->getOperand(0);
-
- Type *DestPTy = CI->getType()->getPointerElementType();
- PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
- if (!SrcTy) return nullptr;
-
- Type *SrcPTy = SrcTy->getElementType();
-
- if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
- return nullptr;
-
- /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
- /// to its first element. This allows us to handle things like:
- /// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
- /// on 32-bit hosts.
- SmallVector<Value*, 4> NewGEPIndices;
-
- // If the source is an array, the code below will not succeed. Check to
- // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
- // constants.
- if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
- // Index through pointer.
- Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
- NewGEPIndices.push_back(Zero);
-
- while (1) {
- if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
- if (!STy->getNumElements()) /* Struct can be empty {} */
- break;
- NewGEPIndices.push_back(Zero);
- SrcPTy = STy->getElementType(0);
- } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
- NewGEPIndices.push_back(Zero);
- SrcPTy = ATy->getElementType();
- } else {
- break;
- }
- }
-
- SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
- }
-
- if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
- return nullptr;
-
- // If the pointers point into different address spaces don't do the
- // transformation.
- if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace())
- return nullptr;
-
- // If the pointers point to values of different sizes don't do the
- // transformation.
- if (!IC.getDataLayout() ||
- IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
- IC.getDataLayout()->getTypeSizeInBits(DestPTy))
- return nullptr;
+/// \brief Combine stores to match the type of value being stored.
+///
+/// The core idea here is that the memory does not have any intrinsic type and
+/// where we can we should match the type of a store to the type of value being
+/// stored.
+///
+/// However, this routine must never change the width of a store or the number of
+/// stores as that would introduce a semantic change. This combine is expected to
+/// be a semantic no-op which just allows stores to more closely model the types
+/// of their incoming values.
+///
+/// Currently, we also refuse to change the precise type used for an atomic or
+/// volatile store. This is debatable, and might be reasonable to change later.
+/// However, it is risky in case some backend or other part of LLVM is relying
+/// on the exact type stored to select appropriate atomic operations.
+///
+/// \returns true if the store was successfully combined away. This indicates
+/// the caller must erase the store instruction. We have to let the caller erase
+/// the store instruction sas otherwise there is no way to signal whether it was
+/// combined or not: IC.EraseInstFromFunction returns a null pointer.
+static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!SI.isSimple())
+ return false;
- // If the pointers point to pointers to different address spaces don't do the
- // transformation. It is not safe to introduce an addrspacecast instruction in
- // this case since, depending on the target, addrspacecast may not be a no-op
- // cast.
- if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
- SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
- return nullptr;
+ Value *V = SI.getValueOperand();
- // Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
- // the store, cast the value to be stored.
- Value *NewCast;
- Instruction::CastOps opcode = Instruction::BitCast;
- Type* CastSrcTy = DestPTy;
- Type* CastDstTy = SrcPTy;
- if (CastDstTy->isPointerTy()) {
- if (CastSrcTy->isIntegerTy())
- opcode = Instruction::IntToPtr;
- } else if (CastDstTy->isIntegerTy()) {
- if (CastSrcTy->isPointerTy())
- opcode = Instruction::PtrToInt;
+ // Fold away bit casts of the stored value by storing the original type.
+ if (auto *BC = dyn_cast<BitCastInst>(V)) {
+ V = BC->getOperand(0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
}
- // SIOp0 is a pointer to aggregate and this is a store to the first field,
- // emit a GEP to index into its first field.
- if (!NewGEPIndices.empty())
- CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
- Value *SIOp0 = SI.getOperand(0);
- NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
- SIOp0->getName()+".c");
- SI.setOperand(0, NewCast);
- SI.setOperand(1, CastOp);
- return &SI;
+ // FIXME: We should also canonicalize loads of vectors when their elements are
+ // cast to other types.
+ return false;
}
/// equivalentAddressValues - Test if A and B will obviously have the same
@@ -621,11 +840,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
+ // Try to canonicalize the stored type.
+ if (combineStoreToValueType(*this, SI))
+ return EraseInstFromFunction(SI);
+
// Attempt to improve the alignment.
if (DL) {
- unsigned KnownAlign =
- getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()),
- DL, AT, &SI, DT);
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
unsigned StoreAlign = SI.getAlignment();
unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
DL->getABITypeAlignment(Val->getType());
@@ -636,6 +858,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
SI.setAlignment(EffectiveStoreAlign);
}
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+ Worklist.Add(NewGEPI);
+ return &SI;
+ }
+
// Don't hack volatile/atomic stores.
// FIXME: Some bits are legal for atomic stores; needs refactoring.
if (!SI.isSimple()) return nullptr;
@@ -712,17 +940,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (isa<UndefValue>(Val))
return EraseInstFromFunction(SI);
- // If the pointer destination is a cast, see if we can fold the cast into the
- // source instead.
- if (isa<CastInst>(Ptr))
- if (Instruction *Res = InstCombineStoreToCast(*this, SI))
- return Res;
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
- if (CE->isCast())
- if (Instruction *Res = InstCombineStoreToCast(*this, SI))
- return Res;
-
-
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.