aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2015-04-01 18:49:24 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-04-01 18:49:26 +0000
commit3fa16bd6062e23bcdb82ed4dd965674792e6b761 (patch)
tree9348fc507292f7e8715d22d64ce5a32131b4f875 /lib/CodeGen/CodeGenPrepare.cpp
parentbeed47390a60f6f0c77532b3d3f76bb47ef49423 (diff)
parentebe69fe11e48d322045d5949c83283927a0d790b (diff)
downloadexternal_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.zip
external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.tar.gz
external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.tar.bz2
Merge "Update aosp/master LLVM for rebase to r230699."
Diffstat (limited to 'lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp1139
1 files changed, 992 insertions, 147 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 8d20848..c0d7dca 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -30,20 +31,22 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -70,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
+static cl::opt<bool>
+ DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@@ -90,6 +97,16 @@ static cl::opt<bool> StressStoreExtract(
"stress-cgp-store-extract", cl::Hidden, cl::init(false),
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+static cl::opt<bool> DisableExtLdPromotion(
+ "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
+ "CodeGenPrepare"));
+
+static cl::opt<bool> StressExtLdPromotion(
+ "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
+ "optimization in CodeGenPrepare"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
struct TypeIsSExt {
@@ -98,6 +115,7 @@ struct TypeIsSExt {
TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
};
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -143,8 +161,8 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
@@ -152,12 +170,12 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeInst(Instruction *I);
+ bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+ bool OptimizeInst(Instruction *I, bool& ModifiedDT);
bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI);
- bool MoveExtToFormExtLoad(Instruction *I);
+ bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
+ bool MoveExtToFormExtLoad(Instruction *&I);
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
@@ -165,6 +183,12 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
bool sinkAndCmp(Function &F);
+ bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInst);
+ bool splitBranchCondition(Function &F);
+ bool simplifyOffsetableRelocate(Instruction &I);
};
}
@@ -187,14 +211,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
ModifiedDT = false;
if (TM)
- TLI = TM->getSubtargetImpl()->getTargetLowering();
- TLInfo = &getAnalysis<TargetLibraryInfo>();
- TTI = &getAnalysis<TargetTransformInfo>();
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
+ OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -218,15 +241,23 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// into a single target instruction, push the mask and compare into branch
// users. Do this before OptimizeBlock -> OptimizeInst ->
// OptimizeCmpExpression, which perturbs the pattern being searched for.
- if (!DisableBranchOpts)
+ if (!DisableBranchOpts) {
EverMadeChange |= sinkAndCmp(F);
+ EverMadeChange |= splitBranchCondition(F);
+ }
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = I++;
- MadeChange |= OptimizeBlock(*BB);
+ bool ModifiedDTOnIteration = false;
+ MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
+
+ // Restart BB iteration if the dominator tree of the Function was changed
+ ModifiedDT |= ModifiedDTOnIteration;
+ if (ModifiedDTOnIteration)
+ break;
}
EverMadeChange |= MadeChange;
}
@@ -236,9 +267,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!DisableBranchOpts) {
MadeChange = false;
SmallPtrSet<BasicBlock*, 8> WorkList;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
- MadeChange |= ConstantFoldTerminator(BB, true);
+ for (BasicBlock &BB : F) {
+ SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
+ MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
for (SmallVectorImpl<BasicBlock*>::iterator
@@ -272,6 +303,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= MadeChange;
}
+ if (!DisableGCOpts) {
+ SmallVector<Instruction *, 2> Statepoints;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (isStatepoint(I))
+ Statepoints.push_back(&I);
+ for (auto &I : Statepoints)
+ EverMadeChange |= simplifyOffsetableRelocate(*I);
+ }
+
if (ModifiedDT && DT)
DT->recalculate(F);
@@ -300,7 +341,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, this);
+ MergeBasicBlockIntoOnlyPred(BB, DT);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -440,7 +481,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, this);
+ MergeBasicBlockIntoOnlyPred(DestBB, DT);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -495,6 +536,144 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+ const SmallVectorImpl<User *> &AllRelocateCalls,
+ DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
+ RelocateInstMap) {
+ // Collect information in two maps: one primarily for locating the base object
+ // while filling the second map; the second map is the final structure holding
+ // a mapping between Base and corresponding Derived relocate calls
+ DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
+ for (auto &U : AllRelocateCalls) {
+ GCRelocateOperands ThisRelocate(U);
+ IntrinsicInst *I = cast<IntrinsicInst>(U);
+ auto K = std::make_pair(ThisRelocate.basePtrIndex(),
+ ThisRelocate.derivedPtrIndex());
+ RelocateIdxMap.insert(std::make_pair(K, I));
+ }
+ for (auto &Item : RelocateIdxMap) {
+ std::pair<unsigned, unsigned> Key = Item.first;
+ if (Key.first == Key.second)
+ // Base relocation: nothing to insert
+ continue;
+
+ IntrinsicInst *I = Item.second;
+ auto BaseKey = std::make_pair(Key.first, Key.first);
+ IntrinsicInst *Base = RelocateIdxMap[BaseKey];
+ if (!Base)
+ // TODO: We might want to insert a new base object relocate and gep off
+ // that, if there are enough derived object relocates.
+ continue;
+ RelocateInstMap[Base].push_back(I);
+ }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+ SmallVectorImpl<Value *> &OffsetV) {
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ // Only accept small constant integer operands
+ auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!Op || Op->getZExtValue() > 20)
+ return false;
+ }
+
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+ OffsetV.push_back(GEP->getOperand(i));
+ return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
+ const SmallVectorImpl<IntrinsicInst *> &Targets) {
+ bool MadeChange = false;
+ for (auto &ToReplace : Targets) {
+ GCRelocateOperands MasterRelocate(RelocatedBase);
+ GCRelocateOperands ThisRelocate(ToReplace);
+
+ assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
+ "Not relocating a derived object of the original base object");
+ if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
+ // A duplicate relocate call. TODO: coalesce duplicates.
+ continue;
+ }
+
+ Value *Base = ThisRelocate.basePtr();
+ auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
+ if (!Derived || Derived->getPointerOperand() != Base)
+ continue;
+
+ SmallVector<Value *, 2> OffsetV;
+ if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+ continue;
+
+ // Create a Builder and replace the target callsite with a gep
+ IRBuilder<> Builder(ToReplace);
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+ Value *Replacement =
+ Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
+ Instruction *ReplacementInst = cast<Instruction>(Replacement);
+ ReplacementInst->removeFromParent();
+ ReplacementInst->insertAfter(RelocatedBase);
+ Replacement->takeName(ToReplace);
+ ToReplace->replaceAllUsesWith(Replacement);
+ ToReplace->eraseFromParent();
+
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+ bool MadeChange = false;
+ SmallVector<User *, 2> AllRelocateCalls;
+
+ for (auto *U : I.users())
+ if (isGCRelocate(dyn_cast<Instruction>(U)))
+ // Collect all the relocate calls associated with a statepoint
+ AllRelocateCalls.push_back(U);
+
+ // We need atleast one base pointer relocation + one derived pointer
+ // relocation to mangle
+ if (AllRelocateCalls.size() < 2)
+ return false;
+
+ // RelocateInstMap is a mapping from the base relocate instruction to the
+ // corresponding derived relocate instructions
+ DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+ computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+ if (RelocateInstMap.empty())
+ return false;
+
+ for (auto &Item : RelocateInstMap)
+ // Item.first is the RelocatedBase to offset against
+ // Item.second is the vector of Targets to replace
+ MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+ return MadeChange;
+}
+
/// SinkCast - Sink the specified cast instruction into its user blocks
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
@@ -822,23 +1001,211 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
return MadeChange;
}
-namespace {
-class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
-protected:
- void replaceCall(Value *With) override {
- CI->replaceAllUsesWith(With);
- CI->eraseFromParent();
+// ScalarizeMaskedLoad() translates masked load intrinsic, like
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask, <16 x i32> %passthru)
+// to a chain of basic blocks, whith loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.load, label %else
+//
+//cond.load: ; preds = %0
+// %4 = getelementptr i32* %1, i32 0
+// %5 = load i32* %4
+// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// br label %else
+//
+//else: ; preds = %0, %cond.load
+// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
+// %7 = extractelement <16 x i1> %mask, i32 1
+// %8 = icmp eq i1 %7, true
+// br i1 %8, label %cond.load1, label %else2
+//
+//cond.load1: ; preds = %else
+// %9 = getelementptr i32* %1, i32 1
+// %10 = load i32* %9
+// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// br label %else2
+//
+//else2: ; preds = %else, %cond.load1
+// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+// %12 = extractelement <16 x i1> %mask, i32 2
+// %13 = icmp eq i1 %12, true
+// br i1 %13, label %cond.load4, label %else5
+//
+static void ScalarizeMaskedLoad(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Src0 = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(2);
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+ Type *EltTy = VecType->getElementType();
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ unsigned VectorWidth = VecType->getNumElements();
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_load = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst* Load = Builder.CreateLoad(Gep, false);
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
}
- bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override {
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
- return SizeCI->isAllOnesValue();
- return false;
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// ScalarizeMaskedStore() translates masked store intrinsic, like
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.store, label %else
+//
+// cond.store: ; preds = %0
+// %4 = extractelement <16 x i32> %val, i32 0
+// %5 = getelementptr i32* %1, i32 0
+// store i32 %4, i32* %5
+// br label %else
+//
+// else: ; preds = %0, %cond.store
+// %6 = extractelement <16 x i1> %mask, i32 1
+// %7 = icmp eq i1 %6, true
+// br i1 %7, label %cond.store1, label %else2
+//
+// cond.store1: ; preds = %else
+// %8 = extractelement <16 x i32> %val, i32 1
+// %9 = getelementptr i32* %1, i32 1
+// store i32 %8, i32* %9
+// br label %else2
+// . . .
+static void ScalarizeMaskedStore(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Src = CI->getArgOperand(0);
+ Value *Mask = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(Src->getType());
+ Type *EltTy = VecType->getElementType();
+
+ assert(VecType && "Unexpected data type in masked store intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+
+ unsigned VectorWidth = VecType->getNumElements();
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_store = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateStore(OneElt, Gep);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
}
-};
-} // end anonymous namespace
+ CI->eraseFromParent();
+}
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
@@ -858,38 +1225,60 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
return true;
}
- // Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
- // Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakVH to hold onto it in case this
- // happens.
- WeakVH IterHandle(CurInstIterator);
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ // Lower all uses of llvm.objectsize.*
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ Type *ReturnTy = CI->getType();
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ replaceAndRecursivelySimplify(CI, RetVal,
+ TLI ? TLI->getDataLayout() : nullptr,
+ TLInfo, ModifiedDT ? nullptr : DT);
- replaceAndRecursivelySimplify(CI, RetVal,
- TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, ModifiedDT ? nullptr : DT);
-
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurInstIterator) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ return true;
+ }
+ case Intrinsic::masked_load: {
+ // Scalarize unsupported vector masked load
+ if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
+ ScalarizeMaskedLoad(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_store: {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
+ ScalarizeMaskedStore(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
}
- return true;
- }
- if (II && TLI) {
- SmallVector<Value*, 2> PtrOps;
- Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
- while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
- return true;
+ if (TLI) {
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty())
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ return true;
+ }
}
// From here on out we're working with named functions.
@@ -901,10 +1290,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
- // that have the default "don't know" as the objectsize. Anything else
- // should be left alone.
- CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD, TLInfo);
+ // to fortified library functions (e.g. __memcpy_chk) that have the default
+ // "don't know" as the objectsize. Anything else should be left alone.
+ FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+ if (Value *V = Simplifier.optimizeCall(CI)) {
+ CI->replaceAllUsesWith(V);
+ CI->eraseFromParent();
+ return true;
+ }
+ return false;
}
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -1561,6 +1955,7 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
+ const TargetMachine &TM;
const TargetLowering &TLI;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -1584,13 +1979,15 @@ class AddressingModeMatcher {
/// always returns true.
bool IgnoreProfitability;
- AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
- const TargetLowering &T, Type *AT,
- Instruction *MI, ExtAddrMode &AM,
- const SetOfInstrs &InsertedTruncs,
+ AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
+ const TargetMachine &TM, Type *AT, Instruction *MI,
+ ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+ : AddrModeInsts(AMI), TM(TM),
+ TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
+ ->getTargetLowering()),
+ AccessTy(AT), MemoryInst(MI), AddrMode(AM),
InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
}
@@ -1607,13 +2004,13 @@ public:
static ExtAddrMode Match(Value *V, Type *AccessTy,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetLowering &TLI,
+ const TargetMachine &TM,
const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT).MatchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -1718,6 +2115,23 @@ static bool MightBeFoldableInst(Instruction *I) {
}
}
+/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
+/// \note \p Val is assumed to be the product of some type promotion.
+/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
+/// to be legal, as the non-promoted value would have had the same state.
+static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
+ Instruction *PromotedInst = dyn_cast<Instruction>(Val);
+ if (!PromotedInst)
+ return false;
+ int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
+ // If the ISDOpcode is undefined, it was undefined before the promotion.
+ if (!ISDOpcode)
+ return true;
+ // Otherwise, check if the promoted instruction is legal or not.
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+}
+
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
/// \brief Utility function to check whether or not a sign or zero extension
@@ -1747,46 +2161,59 @@ class TypePromotionHelper {
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ static Value *promoteOperandForTruncAndAnyExt(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
/// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *promoteOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts, bool IsSExt);
+ static Value *
+ promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
/// \see promoteOperandForOther.
- static Value *signExtendOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true);
+ static Value *
+ signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
+ Truncs, true);
}
/// \see promoteOperandForOther.
- static Value *zeroExtendOperandForOther(Instruction *Ext,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false);
+ static Value *
+ zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
+ Truncs, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@@ -1805,6 +2232,12 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
Type *ConsideredExtType,
const InstrToOrigTy &PromotedInsts,
bool IsSExt) {
+ // The promotion helper does not know how to deal with vector types yet.
+ // To be able to fix that, we would need to fix the places where we
+ // statically extend, e.g., constants and such.
+ if (Inst->getType()->isVectorTy())
+ return false;
+
// We can always get through zext.
if (isa<ZExtInst>(Inst))
return true;
@@ -1830,8 +2263,9 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// Check if we can use this operand in the extension.
// If the type is larger than the result type of the extension,
// we cannot.
- if (OpndVal->getType()->getIntegerBitWidth() >
- ConsideredExtType->getIntegerBitWidth())
+ if (!OpndVal->getType()->isIntegerTy() ||
+ OpndVal->getType()->getIntegerBitWidth() >
+ ConsideredExtType->getIntegerBitWidth())
return false;
// If the operand of the truncate is not an instruction, we will not have
@@ -1896,7 +2330,9 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
@@ -1922,8 +2358,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
- if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType())
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
+ if (ExtInst && Exts)
+ Exts->push_back(ExtInst);
return ExtVal;
+ }
// At this point we have: ext ty opnd to ty.
// Reassign the uses of ExtInst to the opnd and remove ExtInst.
@@ -1934,7 +2373,9 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) {
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
@@ -1949,6 +2390,8 @@ Value *TypePromotionHelper::promoteOperandForOther(
ITrunc->removeFromParent();
// Insert it just after the definition.
ITrunc->insertAfter(ExtOpnd);
+ if (Truncs)
+ Truncs->push_back(ITrunc);
}
TPT.replaceAllUsesWith(ExtOpnd, Trunc);
@@ -2004,12 +2447,17 @@ Value *TypePromotionHelper::promoteOperandForOther(
if (!ExtForOpnd) {
// If yes, create a new one.
DEBUG(dbgs() << "More operands to ext\n");
- ExtForOpnd =
- cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
- : TPT.createZExt(Ext, Opnd, Ext->getType()));
+ Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
+ : TPT.createZExt(Ext, Opnd, Ext->getType());
+ if (!isa<Instruction>(ValForExtOpnd)) {
+ TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
+ continue;
+ }
+ ExtForOpnd = cast<Instruction>(ValForExtOpnd);
++CreatedInsts;
}
-
+ if (Exts)
+ Exts->push_back(ExtForOpnd);
TPT.setOperand(ExtForOpnd, 0, Opnd);
// Move the sign extension before the insertion point.
@@ -2047,16 +2495,7 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
// Check that we did not create an illegal instruction.
- Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand);
- if (!PromotedInst)
- return false;
- int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
- // If the ISDOpcode is undefined, it was undefined before the promotion.
- if (!ISDOpcode)
- return true;
- // Otherwise, check if the promoted instruction is legal or not.
- return TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+ return isPromotedInstructionLegal(TLI, PromotedOperand);
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
@@ -2250,7 +2689,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
unsigned CreatedInsts = 0;
- Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts);
+ Value *PromotedOperand =
+ TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2374,13 +2814,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
/// inline asm call are due to memory operands. If so, return true, otherwise
/// return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetLowering &TLI) {
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ const TargetMachine &TM) {
+ const Function *F = CI->getParent()->getParent();
+ const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue());
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -2396,10 +2840,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
/// memory use. If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
- SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSetImpl<Instruction*> &ConsideredInsts,
- const TargetLowering &TLI) {
+static bool FindAllMemoryUses(
+ Instruction *I,
+ SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -2429,12 +2873,12 @@ static bool FindAllMemoryUses(Instruction *I,
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TM))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
return true;
}
@@ -2522,7 +2966,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// uses.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -2547,7 +2991,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -2630,7 +3074,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
+ V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
@@ -2705,8 +3149,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
- } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
- TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+ } else if (AddrSinkUsingGEPs ||
+ (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
+ TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
+ ->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -2929,8 +3375,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
+ TargetConstraints = TLI->ParseConstraints(TRI, CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -2949,26 +3397,186 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
+/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// sign extensions.
+static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
+ assert(!Inst->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+ bool IsSExt = isa<SExtInst>(FirstUser);
+ Type *ExtTy = FirstUser->getType();
+ for (const User *U : Inst->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
+ return false;
+ Type *CurTy = UI->getType();
+ // Same input and output types: Same instruction after CSE.
+ if (CurTy == ExtTy)
+ continue;
+
+ // If IsSExt is true, we are in this situation:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty1 a to ty3
+ // Assuming ty2 is shorter than ty3, this could be turned into:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty2 b to ty3
+ // However, the last sext is not free.
+ if (IsSExt)
+ return false;
+
+ // This is a ZExt, maybe this is free to extend from one type to another.
+ // In that case, we would not account for a different use.
+ Type *NarrowTy;
+ Type *LargeTy;
+ if (ExtTy->getScalarType()->getIntegerBitWidth() >
+ CurTy->getScalarType()->getIntegerBitWidth()) {
+ NarrowTy = CurTy;
+ LargeTy = ExtTy;
+ } else {
+ NarrowTy = ExtTy;
+ LargeTy = CurTy;
+ }
+
+ if (!TLI.isZExtFree(NarrowTy, LargeTy))
+ return false;
+ }
+ // All uses are the same or can be derived from one another for free.
+ return true;
+}
+
+/// \brief Try to form ExtLd by promoting \p Exts until they reach a
+/// load instruction.
+/// If an ext(load) can be formed, it is returned via \p LI for the load
+/// and \p Inst for the extension.
+/// Otherwise LI == nullptr and Inst == nullptr.
+/// When some promotion happened, \p TPT contains the proper state to
+/// revert them.
+///
+/// \return true when promoting was necessary to expose the ext(load)
+/// opportunity, false otherwise.
+///
+/// Example:
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+/// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Thanks to the promotion, we can match zext(load i32*) to i64.
+bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+ LoadInst *&LI, Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInsts = 0) {
+ // Iterate over all the extensions to see if one form an ext(load).
+ for (auto I : Exts) {
+ // Check if we directly have ext(load).
+ if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
+ Inst = I;
+ // No promotion happened here.
+ return false;
+ }
+ // Check whether or not we want to do any promotion.
+ if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
+ continue;
+ // Get the action to perform the promotion.
+ TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
+ I, InsertedTruncsSet, *TLI, PromotedInsts);
+ // Check if we can promote.
+ if (!TPH)
+ continue;
+ // Save the current state.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 4> NewExts;
+ unsigned NewCreatedInsts = 0;
+ // Promote.
+ Value *PromotedVal =
+ TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+ assert(PromotedVal &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ // We would be able to merge only one extension in a load.
+ // Therefore, if we have more than 1 new extension we heuristically
+ // cut this search path, because it means we degrade the code quality.
+ // With exactly 2, the transformation is neutral, because we will merge
+ // one extension but leave one. However, we optimistically keep going,
+ // because the new extension may be removed too.
+ unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+ if (!StressExtLdPromotion &&
+ (TotalCreatedInsts > 1 ||
+ !isPromotedInstructionLegal(*TLI, PromotedVal))) {
+ // The promotion is not profitable, rollback to the previous state.
+ TPT.rollback(LastKnownGood);
+ continue;
+ }
+ // The promotion is profitable.
+ // Check if it exposes an ext(load).
+ (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
+ if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+ // If we have created a new extension, i.e., now we have two
+ // extensions. We must make sure one of them is merged with
+ // the load, otherwise we may degrade the code quality.
+ (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
+ // Promotion happened.
+ return true;
+ // If this does not help to expose an ext(load) then, rollback.
+ TPT.rollback(LastKnownGood);
+ }
+ // None of the extension can form an ext(load).
+ LI = nullptr;
+ Inst = nullptr;
+ return false;
+}
+
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
/// basic block as the load, unless conditions are unfavorable. This allows
/// SelectionDAG to fold the extend into the load.
+/// \p I[in/out] the extension may be modified during the process if some
+/// promotions apply.
///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+ // Try to promote a chain of computation if it allows to form
+ // an extended load.
+ TypePromotionTransaction TPT;
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ Exts.push_back(I);
// Look for a load being extended.
- LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
- if (!LI) return false;
+ LoadInst *LI = nullptr;
+ Instruction *OldExt = I;
+ bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+ if (!LI || !I) {
+ assert(!HasPromoted && !LI && "If we did not match any load instruction "
+ "the code must remain the same");
+ I = OldExt;
+ return false;
+ }
// If they're already in the same block, there's nothing to do.
- if (LI->getParent() == I->getParent())
+ // Make the cheap checks first if we did not promote.
+ // If we promoted, we need to check if it is indeed profitable.
+ if (!HasPromoted && LI->getParent() == I->getParent())
return false;
+ EVT VT = TLI->getValueType(I->getType());
+ EVT LoadVT = TLI->getValueType(LI->getType());
+
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
- if (!LI->hasOneUse() &&
- TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
- !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
- !TLI->isTruncateFree(I->getType(), LI->getType()))
+ if (!LI->hasOneUse() && TLI &&
+ (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType())) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
return false;
+ }
// Check whether the target supports casts folded into loads.
unsigned LType;
@@ -2978,11 +3586,15 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
assert(isa<SExtInst>(I) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+ if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
return false;
+ }
// Move the extend into the same block as the load, so that SelectionDAG
// can fold it.
+ TPT.commit();
I->removeFromParent();
I->insertAfter(LI);
++NumExtsMoved;
@@ -3512,7 +4124,8 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
isa<UndefValue>(Val) ||
canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
} else
- assert(0 && "Did you modified shouldPromote and forgot to update this?");
+ llvm_unreachable("Did you modified shouldPromote and forgot to update "
+ "this?");
ToBePromoted->setOperand(U.getOperandNo(), NewVal);
}
Transition->removeFromParent();
@@ -3575,7 +4188,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
return false;
}
-bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
@@ -3654,14 +4267,14 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
++NumGEPsElim;
- OptimizeInst(NC);
+ OptimizeInst(NC, ModifiedDT);
return true;
}
return false;
}
if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI);
+ return OptimizeCallInst(CI, ModifiedDT);
if (SelectInst *SI = dyn_cast<SelectInst>(I))
return OptimizeSelectInst(SI);
@@ -3678,14 +4291,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end())
- MadeChange |= OptimizeInst(CurInstIterator++);
-
+ while (CurInstIterator != BB.end()) {
+ MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
+ if (ModifiedDT)
+ return true;
+ }
MadeChange |= DupRetToEnableTailCallOpts(&BB);
return MadeChange;
@@ -3696,10 +4311,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
// find a node corresponding to the value.
bool CodeGenPrepare::PlaceDbgValues(Function &F) {
bool MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ for (BasicBlock &BB : F) {
Instruction *PrevNonDbgInst = nullptr;
- for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
- Instruction *Insn = BI; ++BI;
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
+ Instruction *Insn = BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
// Leave dbg.values that refer to an alloca alone. These
// instrinsics describe the address of a variable (= the alloca)
@@ -3793,3 +4408,233 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
}
return MadeChange;
}
+
+/// \brief Retrieve the probabilities of a conditional branch. Returns true on
+/// success, or returns false if no or invalid metadata was found.
+static bool extractBranchMetadata(BranchInst *BI,
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
+ assert(BI->isConditional() &&
+ "Looking for probabilities on unconditional branch?");
+ auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3)
+ return false;
+
+ const auto *CITrue =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
+ const auto *CIFalse =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
+ if (!CITrue || !CIFalse)
+ return false;
+
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
+
+ return true;
+}
+
+/// \brief Scale down both weights to fit into uint32_t.
+static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+ uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+ uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+ NewTrue = NewTrue / Scale;
+ NewFalse = NewFalse / Scale;
+}
+
+/// \brief Some targets prefer to split a conditional branch like:
+/// \code
+/// %0 = icmp ne i32 %a, 0
+/// %1 = icmp ne i32 %b, 0
+/// %or.cond = or i1 %0, %1
+/// br i1 %or.cond, label %TrueBB, label %FalseBB
+/// \endcode
+/// into multiple branch instructions like:
+/// \code
+/// bb1:
+/// %0 = icmp ne i32 %a, 0
+/// br i1 %0, label %TrueBB, label %bb2
+/// bb2:
+/// %1 = icmp ne i32 %b, 0
+/// br i1 %1, label %TrueBB, label %FalseBB
+/// \endcode
+/// This usually allows instruction selection to do even further optimizations
+/// and combine the compare with the branch instruction. Currently this is
+/// applied for targets which have "cheap" jump instructions.
+///
+/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
+///
+bool CodeGenPrepare::splitBranchCondition(Function &F) {
+ if (!TM || TM->Options.EnableFastISel != true ||
+ !TLI || TLI->isJumpExpensive())
+ return false;
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ // Does this BB end with the following?
+ // %cond1 = icmp|fcmp|binary instruction ...
+ // %cond2 = icmp|fcmp|binary instruction ...
+ // %cond.or = or|and i1 %cond1, cond2
+ // br i1 %cond.or label %dest1, label %dest2"
+ BinaryOperator *LogicOp;
+ BasicBlock *TBB, *FBB;
+ if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
+ continue;
+
+ unsigned Opc;
+ Value *Cond1, *Cond2;
+ if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::And;
+ else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::Or;
+ else
+ continue;
+
+ if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
+ !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
+ continue;
+
+ DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
+
+ // Create a new BB.
+ auto *InsertBefore = std::next(Function::iterator(BB))
+ .getNodePtrUnchecked();
+ auto TmpBB = BasicBlock::Create(BB.getContext(),
+ BB.getName() + ".cond.split",
+ BB.getParent(), InsertBefore);
+
+ // Update original basic block by using the first condition directly by the
+ // branch instruction and removing the no longer needed and/or instruction.
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ Br1->setCondition(Cond1);
+ LogicOp->eraseFromParent();
+
+ // Depending on the conditon we have to either replace the true or the false
+ // successor of the original branch instruction.
+ if (Opc == Instruction::And)
+ Br1->setSuccessor(0, TmpBB);
+ else
+ Br1->setSuccessor(1, TmpBB);
+
+ // Fill in the new basic block.
+ auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
+ if (auto *I = dyn_cast<Instruction>(Cond2)) {
+ I->removeFromParent();
+ I->insertBefore(Br2);
+ }
+
+ // Update PHI nodes in both successors. The original BB needs to be
+ // replaced in one succesor's PHI nodes, because the branch comes now from
+ // the newly generated BB (NewBB). In the other successor we need to add one
+ // incoming edge to the PHI nodes, because both branch instructions target
+ // now the same successor. Depending on the original branch condition
+ // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
+ // we perfrom the correct update for the PHI nodes.
+ // This doesn't change the successor order of the just created branch
+ // instruction (or any other instruction).
+ if (Opc == Instruction::Or)
+ std::swap(TBB, FBB);
+
+ // Replace the old BB with the new BB.
+ for (auto &I : *TBB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ int i;
+ while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
+ PN->setIncomingBlock(i, TmpBB);
+ }
+
+ // Add another incoming edge form the new BB.
+ for (auto &I : *FBB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ auto *Val = PN->getIncomingValueForBlock(&BB);
+ PN->addIncoming(Val, TmpBB);
+ }
+
+ // Update the branch weights (from SelectionDAGBuilder::
+ // FindMergedConditions).
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for NewBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+ // assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = TrueWeight;
+ uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = TrueWeight;
+ NewFalseWeight = 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else {
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+ // assumes that
+ // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
+ uint64_t NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = 2 * TrueWeight;
+ NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ }
+
+ // Request DOM Tree update.
+ // Note: No point in getting fancy here, since the DT info is never
+ // available to CodeGenPrepare and the existing update code is broken
+ // anyways.
+ ModifiedDT = true;
+
+ MadeChange = true;
+
+ DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
+ TmpBB->dump());
+ }
+ return MadeChange;
+}