aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp11
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp245
-rw-r--r--test/CodeGen/X86/2008-08-05-SpillerBug.ll2
-rw-r--r--test/CodeGen/X86/2008-10-16-SpillerBug.ll2
-rw-r--r--test/CodeGen/X86/2009-04-20-LinearScanOpt.ll121
5 files changed, 312 insertions, 69 deletions
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 7c8041b..f6a1c48 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1734,14 +1734,6 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
}
}
-namespace {
- struct LISorter {
- bool operator()(LiveInterval* A, LiveInterval* B) {
- return A->beginNumber() < B->beginNumber();
- }
- };
-}
-
std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpillsFast(const LiveInterval &li,
const MachineLoopInfo *loopInfo,
@@ -1838,9 +1830,6 @@ addIntervalsForSpillsFast(const LiveInterval &li,
RI = mri_->reg_begin(li.reg);
}
- // Clients expect the new intervals to be returned in sorted order.
- std::sort(added.begin(), added.end(), LISorter());
-
return added;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 26af53e..ba2d3aa 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -45,6 +45,7 @@ using namespace llvm;
STATISTIC(NumIters , "Number of iterations performed");
STATISTIC(NumBacktracks, "Number of times we had to backtrack");
STATISTIC(NumCoalesce, "Number of copies coalesced");
+STATISTIC(NumDowngrade, "Number of registers downgraded");
static cl::opt<bool>
NewHeuristic("new-spilling-heuristic",
@@ -74,6 +75,19 @@ namespace {
EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+ // NextReloadMap - For each register in the map, it maps to the another
+ // register which is defined by a reload from the same stack slot and
+ // both reloads are in the same basic block.
+ DenseMap<unsigned, unsigned> NextReloadMap;
+
+ // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+ // un-favored for allocation.
+ SmallSet<unsigned, 8> DowngradedRegs;
+
+ // DowngradeMap - A map from virtual registers to physical registers being
+ // downgraded for the virtual registers.
+ DenseMap<unsigned, unsigned> DowngradeMap;
+
MachineFunction* mf_;
MachineRegisterInfo* mri_;
const TargetMachine* tm_;
@@ -151,6 +165,16 @@ namespace {
/// ones to the active list.
void processInactiveIntervals(unsigned CurPoint);
+ /// hasNextReloadInterval - Return the next liveinterval that's being
+ /// defined by a reload from the same SS as the specified one.
+ LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+ /// DowngradeRegister - Downgrade a register for allocation.
+ void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+ /// UpgradeRegister - Upgrade a register for allocation.
+ void UpgradeRegister(unsigned Reg);
+
/// assignRegOrStackSlotAtInterval - assign a register if one
/// is available, or spill.
void assignRegOrStackSlotAtInterval(LiveInterval* cur);
@@ -184,6 +208,10 @@ namespace {
/// getFreePhysReg - return a free physical register for this virtual
/// register interval if we have one, otherwise return 0.
unsigned getFreePhysReg(LiveInterval* cur);
+ unsigned getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs);
/// assignVirt2StackSlot - assigns this virtual register to a
/// stack slot. returns the stack slot
@@ -211,17 +239,15 @@ static RegisterPass<RALinScan>
X("linearscan-regalloc", "Linear Scan Register Allocator");
void RALinScan::ComputeRelatedRegClasses() {
- const TargetRegisterInfo &TRI = *tri_;
-
// First pass, add all reg classes to the union, and determine at least one
// reg class that each register is in.
bool HasAliases = false;
- for (TargetRegisterInfo::regclass_iterator RCI = TRI.regclass_begin(),
- E = TRI.regclass_end(); RCI != E; ++RCI) {
+ for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+ E = tri_->regclass_end(); RCI != E; ++RCI) {
RelatedRegClasses.insert(*RCI);
for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
I != E; ++I) {
- HasAliases = HasAliases || *TRI.getAliasSet(*I) != 0;
+ HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
if (PRC) {
@@ -241,7 +267,7 @@ void RALinScan::ComputeRelatedRegClasses() {
for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
I != E; ++I)
- for (const unsigned *AS = TRI.getAliasSet(I->first); *AS; ++AS)
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
}
@@ -326,6 +352,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
active_.clear();
inactive_.clear();
handled_.clear();
+ NextReloadMap.clear();
+ DowngradedRegs.clear();
+ DowngradeMap.clear();
return true;
}
@@ -525,6 +554,9 @@ void RALinScan::updateSpillWeights(std::vector<float> &Weights,
SmallSet<unsigned, 4> Processed;
SmallSet<unsigned, 4> SuperAdded;
SmallVector<unsigned, 4> Supers;
+ // Unfavor downgraded registers for spilling.
+ if (DowngradedRegs.count(reg))
+ weight *= 2.0f;
Weights[reg] += weight;
Processed.insert(reg);
for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
@@ -692,6 +724,43 @@ static bool weightsAreClose(float w1, float w2) {
return (diff / w2) <= 0.05f; // Within 5%.
}
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+ DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+ if (I == NextReloadMap.end())
+ return 0;
+ return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+ bool isNew = DowngradedRegs.insert(Reg);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, Reg));
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
+ isNew = DowngradedRegs.insert(*AS);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, *AS));
+ }
+ ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+ if (Reg) {
+ DowngradedRegs.erase(Reg);
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+ DowngradedRegs.erase(*AS);
+ }
+}
+
+namespace {
+ struct LISorter {
+ bool operator()(LiveInterval* A, LiveInterval* B) {
+ return A->beginNumber() < B->beginNumber();
+ }
+ };
+}
+
/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
/// spill.
void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
@@ -836,6 +905,15 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
prt_->addRegUse(physReg);
active_.push_back(std::make_pair(cur, cur->begin()));
handled_.push_back(cur);
+
+ // "Upgrade" the physical register since it has been allocated.
+ UpgradeRegister(physReg);
+ if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+ // "Downgrade" physReg to try to keep physReg from being allocated until
+ // the next reload from the same SS is allocated.
+ NextReloadLI->preference = physReg;
+ DowngradeRegister(cur, physReg);
+ }
return;
}
DOUT << "no free registers\n";
@@ -896,9 +974,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
if (cur->weight == HUGE_VALF ||
li_->getApproximateInstructionCount(*cur) == 0) {
// Spill a physical register around defs and uses.
- if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_))
+ if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+ DowngradedRegs.clear();
assignRegOrStackSlotAtInterval(cur);
- else {
+ } else {
cerr << "Ran out of registers during register allocation!\n";
exit(1);
}
@@ -919,7 +998,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
DOUT << tri_->getName(RegsWeights[i].first)
<< " (" << RegsWeights[i].second << ")\n");
- // if the current has the minimum weight, we need to spill it and
+ // If the current has the minimum weight, we need to spill it and
// add any added intervals back to unhandled, and restart
// linearscan.
if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
@@ -928,12 +1007,13 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
SmallVector<LiveInterval*, 8> spillIs;
std::vector<LiveInterval*> added =
li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_, SSWeight);
+ std::sort(added.begin(), added.end(), LISorter());
addStackInterval(cur, ls_, li_, SSWeight, *vrm_);
if (added.empty())
return; // Early exit if all spills were folded.
- // Merge added with unhandled. Note that we know that
- // addIntervalsForSpills returns intervals sorted by their starting
+ // Merge added with unhandled. Note that we have already sorted
+ // intervals returned by addIntervalsForSpills by their starting
// point.
for (unsigned i = 0, e = added.size(); i != e; ++i)
unhandled_.push(added[i]);
@@ -942,7 +1022,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
++NumBacktracks;
- // push the current interval back to unhandled since we are going
+ // Push the current interval back to unhandled since we are going
// to re-run at least this iteration. Since we didn't modify it it
// should go back right in the front of the list
unhandled_.push(cur);
@@ -1020,10 +1100,15 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
unhandled_.push(i);
}
- // It interval has a preference, it must be defined by a copy. Clear the
- // preference now since the source interval allocation may have been undone
- // as well.
- i->preference = 0;
+ DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+ if (ii == DowngradeMap.end())
+ // It interval has a preference, it must be defined by a copy. Clear the
+ // preference now since the source interval allocation may have been
+ // undone as well.
+ i->preference = 0;
+ else {
+ UpgradeRegister(ii->second);
+ }
}
// Rewind the iterators in the active, inactive, and fixed lists back to the
@@ -1032,7 +1117,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
RevertVectorIteratorsTo(inactive_, earliestStart);
RevertVectorIteratorsTo(fixed_, earliestStart);
- // scan the rest and undo each interval that expired after t and
+ // Scan the rest and undo each interval that expired after t and
// insert it in active (the next iteration of the algorithm will
// put it in inactive if required)
for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
@@ -1046,9 +1131,87 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
}
}
- // merge added with unhandled
- for (unsigned i = 0, e = added.size(); i != e; ++i)
- unhandled_.push(added[i]);
+ // Merge added with unhandled.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ std::sort(added.begin(), added.end(), LISorter());
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ unsigned ReloadIdx = ReloadLi->beginNumber();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginNumber() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+}
+
+unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs) {
+ unsigned FreeReg = 0;
+ unsigned FreeRegInactiveCount = 0;
+
+ TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
+ TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+ assert(I != E && "No allocatable register in this register class!");
+
+ // Scan for the first available register.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (prt_->isRegAvail(Reg)) {
+ FreeReg = Reg;
+ if (FreeReg < inactiveCounts.size())
+ FreeRegInactiveCount = inactiveCounts[FreeReg];
+ else
+ FreeRegInactiveCount = 0;
+ break;
+ }
+ }
+
+ // If there are no free regs, or if this reg has the max inactive count,
+ // return this register.
+ if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
+ return FreeReg;
+
+ // Continue scanning the registers, looking for the one with the highest
+ // inactive count. Alkis found that this reduced register pressure very
+ // slightly on X86 (in rev 1.94 of this file), though this should probably be
+ // reevaluated now.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+ FreeRegInactiveCount < inactiveCounts[Reg]) {
+ FreeReg = Reg;
+ FreeRegInactiveCount = inactiveCounts[Reg];
+ if (FreeRegInactiveCount == MaxInactiveCount)
+ break; // We found the one with the max inactive count.
+ }
+ }
+
+ return FreeReg;
}
/// getFreePhysReg - return a free physical register for this virtual register
@@ -1078,9 +1241,6 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
}
}
- unsigned FreeReg = 0;
- unsigned FreeRegInactiveCount = 0;
-
// If copy coalescer has assigned a "preferred" register, check if it's
// available first.
if (cur->preference) {
@@ -1094,40 +1254,13 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
<< tri_->getName(cur->preference) << "\n";
}
- // Scan for the first available register.
- TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
- TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
- assert(I != E && "No allocatable register in this register class!");
- for (; I != E; ++I)
- if (prt_->isRegAvail(*I)) {
- FreeReg = *I;
- if (FreeReg < inactiveCounts.size())
- FreeRegInactiveCount = inactiveCounts[FreeReg];
- else
- FreeRegInactiveCount = 0;
- break;
- }
-
- // If there are no free regs, or if this reg has the max inactive count,
- // return this register.
- if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg;
-
- // Continue scanning the registers, looking for the one with the highest
- // inactive count. Alkis found that this reduced register pressure very
- // slightly on X86 (in rev 1.94 of this file), though this should probably be
- // reevaluated now.
- for (; I != E; ++I) {
- unsigned Reg = *I;
- if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() &&
- FreeRegInactiveCount < inactiveCounts[Reg]) {
- FreeReg = Reg;
- FreeRegInactiveCount = inactiveCounts[Reg];
- if (FreeRegInactiveCount == MaxInactiveCount)
- break; // We found the one with the max inactive count.
- }
+ if (!DowngradedRegs.empty()) {
+ unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
}
-
- return FreeReg;
+ return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
}
FunctionPass* llvm::createLinearScanRegisterAllocator() {
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 5ae802d..2ebbe6e 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 56
; PR2568
@g_3 = external global i16 ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
index 5caad4f..4318f1d 100644
--- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*ecx}
+; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edx}
%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
%struct.XXDAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
new file mode 100644
index 0000000..985eb21
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -0,0 +1,121 @@
+; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; rdar://6802189
+
+; Test if linearscan is unfavoring registers for allocation to allow more reuse
+; of reloads from stack slots.
+
+ %struct.SHA_CTX = type { i32, i32, i32, i32, i32, i32, i32, [16 x i32], i32 }
+
+define fastcc void @sha1_block_data_order(%struct.SHA_CTX* nocapture %c, i8* %p, i64 %num) nounwind {
+entry:
+ br label %bb
+
+bb: ; preds = %bb, %entry
+ %asmtmp511 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind ; <i32> [#uses=3]
+ %asmtmp513 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind ; <i32> [#uses=2]
+ %asmtmp516 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind ; <i32> [#uses=1]
+ %asmtmp517 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind ; <i32> [#uses=2]
+ %0 = xor i32 0, %asmtmp513 ; <i32> [#uses=0]
+ %1 = add i32 0, %asmtmp517 ; <i32> [#uses=1]
+ %2 = add i32 %1, 0 ; <i32> [#uses=1]
+ %3 = add i32 %2, 0 ; <i32> [#uses=1]
+ %asmtmp519 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind ; <i32> [#uses=1]
+ %4 = xor i32 0, %asmtmp511 ; <i32> [#uses=1]
+ %asmtmp520 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %4) nounwind ; <i32> [#uses=2]
+ %5 = xor i32 0, %asmtmp516 ; <i32> [#uses=1]
+ %6 = xor i32 %5, %asmtmp519 ; <i32> [#uses=1]
+ %7 = add i32 %asmtmp513, -899497514 ; <i32> [#uses=1]
+ %8 = add i32 %7, %asmtmp520 ; <i32> [#uses=1]
+ %9 = add i32 %8, %6 ; <i32> [#uses=1]
+ %10 = add i32 %9, 0 ; <i32> [#uses=1]
+ %asmtmp523 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind ; <i32> [#uses=1]
+ %asmtmp525 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %3) nounwind ; <i32> [#uses=2]
+ %11 = xor i32 0, %asmtmp525 ; <i32> [#uses=1]
+ %12 = add i32 0, %11 ; <i32> [#uses=1]
+ %13 = add i32 %12, 0 ; <i32> [#uses=2]
+ %asmtmp528 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %10) nounwind ; <i32> [#uses=1]
+ %14 = xor i32 0, %asmtmp520 ; <i32> [#uses=1]
+ %asmtmp529 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %14) nounwind ; <i32> [#uses=1]
+ %asmtmp530 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %13) nounwind ; <i32> [#uses=1]
+ %15 = add i32 0, %asmtmp530 ; <i32> [#uses=1]
+ %16 = xor i32 0, %asmtmp523 ; <i32> [#uses=1]
+ %asmtmp532 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %16) nounwind ; <i32> [#uses=2]
+ %asmtmp533 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %15) nounwind ; <i32> [#uses=1]
+ %17 = xor i32 %13, %asmtmp528 ; <i32> [#uses=1]
+ %18 = xor i32 %17, 0 ; <i32> [#uses=1]
+ %19 = add i32 %asmtmp525, -899497514 ; <i32> [#uses=1]
+ %20 = add i32 %19, %asmtmp532 ; <i32> [#uses=1]
+ %21 = add i32 %20, %18 ; <i32> [#uses=1]
+ %22 = add i32 %21, %asmtmp533 ; <i32> [#uses=1]
+ %23 = xor i32 0, %asmtmp511 ; <i32> [#uses=1]
+ %24 = xor i32 %23, 0 ; <i32> [#uses=1]
+ %asmtmp535 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %24) nounwind ; <i32> [#uses=3]
+ %25 = add i32 0, %asmtmp535 ; <i32> [#uses=1]
+ %26 = add i32 %25, 0 ; <i32> [#uses=1]
+ %27 = add i32 %26, 0 ; <i32> [#uses=1]
+ %28 = xor i32 0, %asmtmp529 ; <i32> [#uses=0]
+ %29 = xor i32 %22, 0 ; <i32> [#uses=1]
+ %30 = xor i32 %29, 0 ; <i32> [#uses=1]
+ %31 = add i32 0, %30 ; <i32> [#uses=1]
+ %32 = add i32 %31, 0 ; <i32> [#uses=3]
+ %asmtmp541 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind ; <i32> [#uses=2]
+ %asmtmp542 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %32) nounwind ; <i32> [#uses=1]
+ %33 = add i32 0, %asmtmp541 ; <i32> [#uses=1]
+ %34 = add i32 %33, 0 ; <i32> [#uses=1]
+ %35 = add i32 %34, %asmtmp542 ; <i32> [#uses=1]
+ %asmtmp543 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %27) nounwind ; <i32> [#uses=2]
+ %36 = xor i32 0, %asmtmp535 ; <i32> [#uses=0]
+ %37 = xor i32 %32, 0 ; <i32> [#uses=1]
+ %38 = xor i32 %37, %asmtmp543 ; <i32> [#uses=1]
+ %39 = add i32 0, %38 ; <i32> [#uses=1]
+ %40 = add i32 %39, 0 ; <i32> [#uses=2]
+ %asmtmp546 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %32) nounwind ; <i32> [#uses=1]
+ %asmtmp547 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind ; <i32> [#uses=2]
+ %41 = add i32 0, -899497514 ; <i32> [#uses=1]
+ %42 = add i32 %41, %asmtmp547 ; <i32> [#uses=1]
+ %43 = add i32 %42, 0 ; <i32> [#uses=1]
+ %44 = add i32 %43, 0 ; <i32> [#uses=3]
+ %asmtmp549 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %35) nounwind ; <i32> [#uses=2]
+ %45 = xor i32 0, %asmtmp541 ; <i32> [#uses=1]
+ %asmtmp550 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %45) nounwind ; <i32> [#uses=2]
+ %asmtmp551 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %44) nounwind ; <i32> [#uses=1]
+ %46 = xor i32 %40, %asmtmp546 ; <i32> [#uses=1]
+ %47 = xor i32 %46, %asmtmp549 ; <i32> [#uses=1]
+ %48 = add i32 %asmtmp543, -899497514 ; <i32> [#uses=1]
+ %49 = add i32 %48, %asmtmp550 ; <i32> [#uses=1]
+ %50 = add i32 %49, %47 ; <i32> [#uses=1]
+ %51 = add i32 %50, %asmtmp551 ; <i32> [#uses=1]
+ %asmtmp552 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %40) nounwind ; <i32> [#uses=2]
+ %52 = xor i32 %44, %asmtmp549 ; <i32> [#uses=1]
+ %53 = xor i32 %52, %asmtmp552 ; <i32> [#uses=1]
+ %54 = add i32 0, %53 ; <i32> [#uses=1]
+ %55 = add i32 %54, 0 ; <i32> [#uses=2]
+ %asmtmp555 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %44) nounwind ; <i32> [#uses=2]
+ %56 = xor i32 0, %asmtmp532 ; <i32> [#uses=1]
+ %57 = xor i32 %56, %asmtmp547 ; <i32> [#uses=1]
+ %asmtmp556 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %57) nounwind ; <i32> [#uses=1]
+ %58 = add i32 0, %asmtmp556 ; <i32> [#uses=1]
+ %59 = add i32 %58, 0 ; <i32> [#uses=1]
+ %60 = add i32 %59, 0 ; <i32> [#uses=1]
+ %asmtmp558 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %51) nounwind ; <i32> [#uses=1]
+ %61 = xor i32 %asmtmp517, %asmtmp511 ; <i32> [#uses=1]
+ %62 = xor i32 %61, %asmtmp535 ; <i32> [#uses=1]
+ %63 = xor i32 %62, %asmtmp550 ; <i32> [#uses=1]
+ %asmtmp559 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %63) nounwind ; <i32> [#uses=1]
+ %64 = xor i32 %55, %asmtmp555 ; <i32> [#uses=1]
+ %65 = xor i32 %64, %asmtmp558 ; <i32> [#uses=1]
+ %asmtmp561 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %55) nounwind ; <i32> [#uses=1]
+ %66 = add i32 %asmtmp552, -899497514 ; <i32> [#uses=1]
+ %67 = add i32 %66, %65 ; <i32> [#uses=1]
+ %68 = add i32 %67, %asmtmp559 ; <i32> [#uses=1]
+ %69 = add i32 %68, 0 ; <i32> [#uses=1]
+ %70 = add i32 %69, 0 ; <i32> [#uses=1]
+ store i32 %70, i32* null, align 4
+ %71 = add i32 0, %60 ; <i32> [#uses=1]
+ store i32 %71, i32* null, align 4
+ %72 = add i32 0, %asmtmp561 ; <i32> [#uses=1]
+ store i32 %72, i32* null, align 4
+ %73 = add i32 0, %asmtmp555 ; <i32> [#uses=1]
+ store i32 %73, i32* null, align 4
+ br label %bb
+}