aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp94
-rw-r--r--test/Transforms/SimplifyCFG/speculate-store.ll108
2 files changed, 198 insertions, 4 deletions
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 6de602e..052ad85 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
}
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return 0;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return 0;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 10;
+ for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+ RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+ Instruction *CurI = &*RI;
+
+ // Could be calling an instruction that effects memory like free().
+ if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return 0;
+
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ // Found the previous store make sure it stores to the same location.
+ if (SI && SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ else if (SI)
+ return 0; // Unknown store.
+ }
+
+ return 0;
+}
+
/// \brief Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = 0;
+ StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = llvm::prior(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores &&
+ (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+ EndBB))))
return false;
- if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
return false;
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
// Do not hoist the instruction if any of its operands are defined but not
- // used in this BB. The transformation will prevent the operand from
+ // used in BB. The transformation will prevent the operand from
// being sunk into the use block.
for (User::op_iterator i = I->op_begin(), e = I->op_end();
i != e; ++i) {
@@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// If there are no PHIs to process, bail early. This helps ensure idempotence
// as well.
- if (!HaveRewritablePHIs)
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<true, NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+ "." + FalseV->getName());
+ SpeculatedStore->setOperand(0, S);
+ }
+
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
llvm::prior(ThenBB->end()));
diff --git a/test/Transforms/SimplifyCFG/speculate-store.ll b/test/Transforms/SimplifyCFG/speculate-store.ll
new file mode 100644
index 0000000..8d7fe79
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-store.ll
@@ -0,0 +1,108 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+define void @ifconvertstore(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+ %arrayidx = getelementptr inbounds i32* %B, i64 0
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %C
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+ store i32 %add, i32* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+ %1 = load i32* %arrayidx4, align 4
+ %add5 = add nsw i32 %1, %D
+ %cmp6 = icmp sgt i32 %add5, %C
+ br i1 %cmp6, label %if.then, label %ret.end
+
+; Make sure we speculate stores like the following one. It is cheap compared to
+; a mispredicated branch.
+; CHECK: @ifconvertstore
+; CHECK: %add5.add = select i1 %cmp6, i32 %add5, i32 %add
+; CHECK: store i32 %add5.add, i32* %arrayidx2, align 4
+if.then:
+ store i32 %add5, i32* %arrayidx2, align 4
+ br label %ret.end
+
+ret.end:
+ ret void
+}
+
+define void @noifconvertstore1(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+ %arrayidx = getelementptr inbounds i32* %B, i64 0
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %C
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; Store to a different location.
+ store i32 %add, i32* %arrayidx, align 4
+ %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+ %1 = load i32* %arrayidx4, align 4
+ %add5 = add nsw i32 %1, %D
+ %cmp6 = icmp sgt i32 %add5, %C
+ br i1 %cmp6, label %if.then, label %ret.end
+
+; CHECK: @noifconvertstore1
+; CHECK-NOT: select
+if.then:
+ store i32 %add5, i32* %arrayidx2, align 4
+ br label %ret.end
+
+ret.end:
+ ret void
+}
+
+declare void @unknown_fun()
+
+define void @noifconvertstore2(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+ %arrayidx = getelementptr inbounds i32* %B, i64 0
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %C
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+ store i32 %add, i32* %arrayidx2, align 4
+ call void @unknown_fun()
+ %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+ %1 = load i32* %arrayidx4, align 4
+ %add5 = add nsw i32 %1, %D
+ %cmp6 = icmp sgt i32 %add5, %C
+ br i1 %cmp6, label %if.then, label %ret.end
+
+; CHECK: @noifconvertstore2
+; CHECK-NOT: select
+if.then:
+ store i32 %add5, i32* %arrayidx2, align 4
+ br label %ret.end
+
+ret.end:
+ ret void
+}
+
+define void @noifconvertstore_volatile(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+ %arrayidx = getelementptr inbounds i32* %B, i64 0
+ %0 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %C
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+ store i32 %add, i32* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+ %1 = load i32* %arrayidx4, align 4
+ %add5 = add nsw i32 %1, %D
+ %cmp6 = icmp sgt i32 %add5, %C
+ br i1 %cmp6, label %if.then, label %ret.end
+
+; Make sure we don't speculate volatile stores.
+; CHECK: @noifconvertstore_volatile
+; CHECK-NOT: select
+if.then:
+ store volatile i32 %add5, i32* %arrayidx2, align 4
+ br label %ret.end
+
+ret.end:
+ ret void
+}