aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorQuentin Colombet <qcolombet@apple.com>2013-10-11 18:17:17 +0000
committerQuentin Colombet <qcolombet@apple.com>2013-10-11 18:17:17 +0000
commit4351741a3b36bfe1ac1b385334fc5fa6f6ef5a11 (patch)
treebc275e8936f4de416301e8b5f0233739b326867f /lib/CodeGen
parent1dfe206062ee43d60d2535bddb0c0b629037e7d8 (diff)
downloadexternal_llvm-4351741a3b36bfe1ac1b385334fc5fa6f6ef5a11.zip
external_llvm-4351741a3b36bfe1ac1b385334fc5fa6f6ef5a11.tar.gz
external_llvm-4351741a3b36bfe1ac1b385334fc5fa6f6ef5a11.tar.bz2
[DAGCombiner] Revert load slicing (r192471), until I figure out why it fails on ubuntu.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192474 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp576
1 files changed, 2 insertions, 574 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8d6eab7..72e001a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -35,7 +35,6 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -45,7 +44,6 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
-STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
@@ -56,14 +54,6 @@ namespace {
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Include global information in alias analysis"));
- /// Hidden option to stress test load slicing, i.e., when this option
- /// is enabled, load slicing bypasses most of its profitability guards.
- static cl::opt<bool>
- StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
- cl::desc("Bypass the profitability model of load "
- "slicing"),
- cl::init(false));
-
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
@@ -73,7 +63,6 @@ namespace {
CodeGenOpt::Level OptLevel;
bool LegalOperations;
bool LegalTypes;
- bool ForCodeSize;
// Worklist of all of the nodes that need to be simplified.
//
@@ -156,7 +145,6 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
- bool SliceUpLoad(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
@@ -328,15 +316,8 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
- }
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -7598,562 +7579,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
- // Try to slice up N to more direct loads if the slices are mapped to
- // different register banks or pairing can take place.
- if (SliceUpLoad(N))
- return SDValue(N, 0);
-
return SDValue();
}
-namespace {
-/// \brief Helper structure used to slice a load in smaller loads.
-/// Basically a slice is obtained from the following sequence:
-/// Origin = load Ty1, Base
-/// Shift = srl Ty1 Origin, CstTy Amount
-/// Inst = trunc Shift to Ty2
-///
-/// Then, it will be rewriten into:
-/// Slice = load SliceTy, Base + SliceOffset
-/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
-///
-/// SliceTy is deduced from the number of bits that are actually used to
-/// build Inst.
-struct LoadedSlice {
- /// \brief Helper structure used to compute the cost of a slice.
- struct Cost {
- /// Are we optimizing for code size.
- bool ForCodeSize;
- /// Various cost.
- unsigned Loads;
- unsigned Truncates;
- unsigned CrossRegisterBanksCopies;
- unsigned ZExts;
- unsigned Shift;
-
- Cost(bool ForCodeSize = false)
- : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
- CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
-
- /// \brief Get the cost of one isolated slice.
- Cost(const LoadedSlice &LS, bool ForCodeSize = false)
- : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
- CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
- EVT TruncType = LS.Inst->getValueType(0);
- EVT LoadedType = LS.getLoadedType();
- if (TruncType != LoadedType &&
- !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
- ZExts = 1;
- }
-
- /// \brief Account for slicing gain in the current cost.
- /// Slicing provide a few gains like removing a shift or a
- /// truncate. This method allows to grow the cost of the original
- /// load with the gain from this slice.
- void addSliceGain(const LoadedSlice &LS) {
- // Each slice saves a truncate.
- const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
- if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
- LS.Inst->getOperand(0).getValueType()))
- ++Truncates;
- // If there is a shift amount, this slice gets rid of it.
- if (LS.Shift)
- ++Shift;
- // If this slice can merge a cross register bank copy, account for it.
- if (LS.canMergeExpensiveCrossRegisterBankCopy())
- ++CrossRegisterBanksCopies;
- }
-
- Cost &operator+=(const Cost &RHS) {
- Loads += RHS.Loads;
- Truncates += RHS.Truncates;
- CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
- ZExts += RHS.ZExts;
- Shift += RHS.Shift;
- return *this;
- }
-
- bool operator==(const Cost &RHS) const {
- return Loads == RHS.Loads && Truncates == RHS.Truncates &&
- CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
- ZExts == RHS.ZExts && Shift == RHS.Shift;
- }
-
- bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
-
- bool operator<(const Cost &RHS) const {
- // Assume cross register banks copies are as expensive as loads.
- // FIXME: Do we want some more target hooks?
- unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
- unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
- // Unless we are optimizing for code size, consider the
- // expensive operation first.
- if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
- return ExpensiveOpsLHS < ExpensiveOpsRHS;
- return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
- (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
- }
-
- bool operator>(const Cost &RHS) const { return RHS < *this; }
-
- bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
-
- bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
- };
- // The last instruction that represent the slice. This should be a
- // truncate instruction.
- SDNode *Inst;
- // The original load instruction.
- LoadSDNode *Origin;
- // The right shift amount in bits from the original load.
- unsigned Shift;
- // The DAG from which Origin came from.
- // This is used to get some contextual information about legal types, etc.
- SelectionDAG *DAG;
-
- LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
- unsigned Shift = 0, SelectionDAG *DAG = NULL)
- : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
-
- LoadedSlice(const LoadedSlice &LS)
- : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
-
- /// \brief Get the bits used in a chunk of bits \p BitWidth large.
- /// \return Result is \p BitWidth and has used bits set to 1 and
- /// not used bits set to 0.
- APInt getUsedBits() const {
- // Reproduce the trunc(lshr) sequence:
- // - Start from the truncated value.
- // - Zero extend to the desired bit width.
- // - Shift left.
- assert(Origin && "No original load to compare against.");
- unsigned BitWidth = Origin->getValueSizeInBits(0);
- assert(Inst && "This slice is not bound to an instruction");
- assert(Inst->getValueSizeInBits(0) <= BitWidth &&
- "Extracted slice is bigger than the whole type!");
- APInt UsedBits(Inst->getValueSizeInBits(0), 0);
- UsedBits.setAllBits();
- UsedBits = UsedBits.zext(BitWidth);
- UsedBits <<= Shift;
- return UsedBits;
- }
-
- /// \brief Get the size of the slice to be loaded in bytes.
- unsigned getLoadedSize() const {
- unsigned SliceSize = getUsedBits().countPopulation();
- assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
- return SliceSize / 8;
- }
-
- /// \brief Get the type that will be loaded for this slice.
- /// Note: This may not be the final type for the slice.
- EVT getLoadedType() const {
- assert(DAG && "Missing context");
- LLVMContext &Ctxt = *DAG->getContext();
- return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
- }
-
- /// \brief Get the alignment of the load used for this slice.
- unsigned getAlignment() const {
- unsigned Alignment = Origin->getAlignment();
- unsigned Offset = getOffsetFromBase();
- if (Offset != 0)
- Alignment = MinAlign(Alignment, Alignment + Offset);
- return Alignment;
- }
-
- /// \brief Check if this slice can be rewritten with legal operations.
- bool isLegal() const {
- // An invalid slice is not legal.
- if (!Origin || !Inst || !DAG)
- return false;
-
- // Offsets are for indexed load only, we do not handle that.
- if (Origin->getOffset().getOpcode() != ISD::UNDEF)
- return false;
-
- const TargetLowering &TLI = DAG->getTargetLoweringInfo();
-
- // Check that the type is legal.
- EVT SliceType = getLoadedType();
- if (!TLI.isTypeLegal(SliceType))
- return false;
-
- // Check that the load is legal for this type.
- if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
- return false;
-
- // Check that the offset can be computed.
- // 1. Check its type.
- EVT PtrType = Origin->getBasePtr().getValueType();
- if (PtrType == MVT::Untyped || PtrType.isExtended())
- return false;
-
- // 2. Check that it fits in the immediate.
- if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
- return false;
-
- // 3. Check that the computation is legal.
- if (!TLI.isOperationLegal(ISD::ADD, PtrType))
- return false;
-
- // Check that the zext is legal if it needs one.
- EVT TruncateType = Inst->getValueType(0);
- if (TruncateType != SliceType &&
- !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
- return false;
-
- return true;
- }
-
- /// \brief Get the offset in bytes of this slice in the original chunk of
- /// bits.
- /// \pre DAG != NULL.
- uint64_t getOffsetFromBase() const {
- assert(DAG && "Missing context.");
- bool IsBigEndian =
- DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
- assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
- uint64_t Offset = Shift / 8;
- unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
- assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
- "The size of the original loaded type is not a multiple of a"
- " byte.");
- // If Offset is bigger than TySizeInBytes, it means we are loading all
- // zeros. This should have been optimized before in the process.
- assert(TySizeInBytes > Offset &&
- "Invalid shift amount for given loaded size");
- if (IsBigEndian)
- Offset = TySizeInBytes - Offset - getLoadedSize();
- return Offset;
- }
-
- /// \brief Generate the sequence of instructions to load the slice
- /// represented by this object and redirect the uses of this slice to
- /// this new sequence of instructions.
- /// \pre this->Inst && this->Origin are valid Instructions and this
- /// object passed the legal check: LoadedSlice::isLegal returned true.
- /// \return The last instruction of the sequence used to load the slice.
- SDValue loadSlice() const {
- assert(Inst && Origin && "Unable to replace a non-existing slice.");
- const SDValue &OldBaseAddr = Origin->getBasePtr();
- SDValue BaseAddr = OldBaseAddr;
- // Get the offset in that chunk of bytes w.r.t. the endianess.
- int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
- assert(Offset >= 0 && "Offset too big to fit in int64_t!");
- if (Offset) {
- // BaseAddr = BaseAddr + Offset.
- EVT ArithType = BaseAddr.getValueType();
- BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
- DAG->getConstant(Offset, ArithType));
- }
-
- // Create the type of the loaded slice according to its size.
- EVT SliceType = getLoadedType();
-
- // Create the load for the slice.
- SDValue LastInst = DAG->getLoad(
- SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
- Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
- Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
- // If the final type is not the same as the loaded type, this means that
- // we have to pad with zero. Create a zero extend for that.
- EVT FinalType = Inst->getValueType(0);
- if (SliceType != FinalType)
- LastInst =
- DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
- return LastInst;
- }
-
- /// \brief Check if this slice can be merged with an expensive cross register
- /// bank copy. E.g.,
- /// i = load i32
- /// f = bitcast i32 i to float
- bool canMergeExpensiveCrossRegisterBankCopy() const {
- if (!Inst || !Inst->hasOneUse())
- return false;
- SDNode *Use = *Inst->use_begin();
- if (Use->getOpcode() != ISD::BITCAST)
- return false;
- assert(DAG && "Missing context");
- const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- EVT ResVT = Use->getValueType(0);
- const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
- const TargetRegisterClass *ArgRC =
- TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
- if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
- return false;
-
- // At this point, we know that we perform a cross-register-bank copy.
- // Check if it is expensive.
- const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
- // Assume bitcasts are cheap, unless both register classes do not
- // explicitly share a common sub class.
- if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
- return false;
-
- // Check if it will be merged with the load.
- // 1. Check the alignment constraint.
- unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
- ResVT.getTypeForEVT(*DAG->getContext()));
-
- if (RequiredAlignment > getAlignment())
- return false;
-
- // 2. Check that the load is a legal operation for that type.
- if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
- return false;
-
- // 3. Check that we do not have a zext in the way.
- if (Inst->getValueType(0) != getLoadedType())
- return false;
-
- return true;
- }
-};
-}
-
-/// \brief Sorts LoadedSlice according to their offset.
-struct LoadedSliceSorter {
- bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
- assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
- return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
- }
-};
-
-/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
-/// \p UsedBits looks like 0..0 1..1 0..0.
-static bool areUsedBitsDense(const APInt &UsedBits) {
- // If all the bits are one, this is dense!
- if (UsedBits.isAllOnesValue())
- return true;
-
- // Get rid of the unused bits on the right.
- APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
- // Get rid of the unused bits on the left.
- if (NarrowedUsedBits.countLeadingZeros())
- NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
- // Check that the chunk of bits is completely used.
- return NarrowedUsedBits.isAllOnesValue();
-}
-
-/// \brief Check whether or not \p First and \p Second are next to each other
-/// in memory. This means that there is no hole between the bits loaded
-/// by \p First and the bits loaded by \p Second.
-static bool areSlicesNextToEachOther(const LoadedSlice &First,
- const LoadedSlice &Second) {
- assert(First.Origin == Second.Origin && First.Origin &&
- "Unable to match different memory origins.");
- APInt UsedBits = First.getUsedBits();
- assert((UsedBits & Second.getUsedBits()) == 0 &&
- "Slices are not supposed to overlap.");
- UsedBits |= Second.getUsedBits();
- return areUsedBitsDense(UsedBits);
-}
-
-/// \brief Adjust the \p GlobalLSCost according to the target
-/// paring capabilities and the layout of the slices.
-/// \pre \p GlobalLSCost should account for at least as many loads as
-/// there is in the slices in \p LoadedSlices.
-static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
- LoadedSlice::Cost &GlobalLSCost) {
- unsigned NumberOfSlices = LoadedSlices.size();
- // If there is less than 2 elements, no pairing is possible.
- if (NumberOfSlices < 2)
- return;
-
- // Sort the slices so that elements that are likely to be next to each
- // other in memory are next to each other in the list.
- std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
- const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
- // First (resp. Second) is the first (resp. Second) potentially candidate
- // to be placed in a paired load.
- const LoadedSlice *First = NULL;
- const LoadedSlice *Second = NULL;
- for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
- // Set the beginning of the pair.
- First = Second) {
-
- Second = &LoadedSlices[CurrSlice];
-
- // If First is NULL, it means we start a new pair.
- // Get to the next slice.
- if (!First)
- continue;
-
- EVT LoadedType = First->getLoadedType();
-
- // If the types of the slices are different, we cannot pair them.
- if (LoadedType != Second->getLoadedType())
- continue;
-
- // Check if the target supplies paired loads for this type.
- unsigned RequiredAlignment = 0;
- if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
- // move to the next pair, this type is hopeless.
- Second = NULL;
- continue;
- }
- // Check if we meet the alignment requirement.
- if (RequiredAlignment > First->getAlignment())
- continue;
-
- // Check that both loads are next to each other in memory.
- if (!areSlicesNextToEachOther(*First, *Second))
- continue;
-
- assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
- --GlobalLSCost.Loads;
- // Move to the next pair.
- Second = NULL;
- }
-}
-
-/// \brief Check the profitability of all involved LoadedSlice.
-/// Currently, it is considered profitable if there is exactly two
-/// involved slices (1) which are (2) next to each other in memory, and
-/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
-///
-/// Note: The order of the elements in \p LoadedSlices may be modified, but not
-/// the elements themselves.
-///
-/// FIXME: When the cost model will be mature enough, we can relax
-/// constraints (1) and (2).
-static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
- const APInt &UsedBits, bool ForCodeSize) {
- unsigned NumberOfSlices = LoadedSlices.size();
- if (StressLoadSlicing)
- return NumberOfSlices > 1;
-
- // Check (1).
- if (NumberOfSlices != 2)
- return false;
-
- // Check (2).
- if (!areUsedBitsDense(UsedBits))
- return false;
-
- // Check (3).
- LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
- // The original code has one big load.
- OrigCost.Loads = 1;
- for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
- const LoadedSlice &LS = LoadedSlices[CurrSlice];
- // Accumulate the cost of all the slices.
- LoadedSlice::Cost SliceCost(LS, ForCodeSize);
- GlobalSlicingCost += SliceCost;
-
- // Account as cost in the original configuration the gain obtained
- // with the current slices.
- OrigCost.addSliceGain(LS);
- }
-
- // If the target supports paired load, adjust the cost accordingly.
- adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
- return OrigCost > GlobalSlicingCost;
-}
-
-/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
-/// operations, split it in the various pieces being extracted.
-///
-/// This sort of thing is introduced by SROA.
-/// This slicing takes care not to insert overlapping loads.
-/// \pre LI is a simple load (i.e., not an atomic or volatile load).
-bool DAGCombiner::SliceUpLoad(SDNode *N) {
- if (Level < AfterLegalizeDAG)
- return false;
-
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
- !LD->getValueType(0).isInteger())
- return false;
-
- // Keep track of already used bits to detect overlapping values.
- // In that case, we will just abort the transformation.
- APInt UsedBits(LD->getValueSizeInBits(0), 0);
-
- SmallVector<LoadedSlice, 4> LoadedSlices;
-
- // Check if this load is used as several smaller chunks of bits.
- // Basically, look for uses in trunc or trunc(lshr) and record a new chain
- // of computation for each trunc.
- for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
- UI != UIEnd; ++UI) {
- // Skip the uses of the chain.
- if (UI.getUse().getResNo() != 0)
- continue;
-
- SDNode *User = *UI;
- unsigned Shift = 0;
-
- // Check if this is a trunc(lshr).
- if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
- isa<ConstantSDNode>(User->getOperand(1))) {
- Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
- User = *User->use_begin();
- }
-
- // At this point, User is a Truncate, iff we encountered, trunc or
- // trunc(lshr).
- if (User->getOpcode() != ISD::TRUNCATE)
- return false;
-
- // The width of the type must be a power of 2 and greater than 8-bits.
- // Otherwise the load cannot be represented in LLVM IR.
- // Moreover, if we shifted with a non 8-bits multiple, the slice
- // will be accross several bytes. We do not support that.
- unsigned Width = User->getValueSizeInBits(0);
- if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
- return 0;
-
- // Build the slice for this chain of computations.
- LoadedSlice LS(User, LD, Shift, &DAG);
- APInt CurrentUsedBits = LS.getUsedBits();
-
- // Check if this slice overlaps with another.
- if ((CurrentUsedBits & UsedBits) != 0)
- return false;
- // Update the bits used globally.
- UsedBits |= CurrentUsedBits;
-
- // Check if the new slice would be legal.
- if (!LS.isLegal())
- return false;
-
- // Record the slice.
- LoadedSlices.push_back(LS);
- }
-
- // Abort slicing if it does not seem to be profitable.
- if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
- return false;
-
- ++SlicedLoads;
-
- // Rewrite each chain to use an independent load.
- // By construction, each chain can be represented by a unique load.
-
- // Prepare the argument for the new token factor for all the slices.
- SmallVector<SDValue, 8> ArgChains;
- for (SmallVectorImpl<LoadedSlice>::const_iterator
- LSIt = LoadedSlices.begin(),
- LSItEnd = LoadedSlices.end();
- LSIt != LSItEnd; ++LSIt) {
- SDValue SliceInst = LSIt->loadSlice();
- CombineTo(LSIt->Inst, SliceInst, true);
- if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
- SliceInst = SliceInst.getOperand(0);
- assert(SliceInst->getOpcode() == ISD::LOAD &&
- "It takes more than a zext to get to the loaded slice!!");
- ArgChains.push_back(SliceInst.getValue(1));
- }
-
- SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
- &ArgChains[0], ArgChains.size());
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
- return true;
-}
-
/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
/// load is having specific bytes cleared out. If so, return the byte size
/// being masked out and the shift amount.