am 1c4ad5ef: Merge branch \'upstream\' into merge-2012_09_10

* commit '1c4ad5ef4fab105f0c8af7edd026e00502fb6279': (446 commits) Revert r163556. Missed updates to tablegen files. Update function names to conform to guidelines. No functional change intended. test/CodeGen/X86/ms-inline-asm.ll: Relax for non-darwin x86 targets. '##InlineAsm' could not be seen in other hosts. [ms-inline asm] Properly emit the asm directives when the AsmPrinterVariant and InlineAsmVariant don't match. Update test case for Release builds. Remove redundant semicolons which are null statements. Disable stack coloring because it makes dragonegg fail bootstrapping. [ms-inline asm] Pass the correct AsmVariant to the PrintAsmOperand() function and update the printOperand() function accordingly. [ms-inline asm] Add support for .att_syntax directive. Enable stack coloring. Don't attempt to use flags from predicated instructions. [Object] Extract Elf_Ehdr. Patch by Hemant Kulkarni! Stack Coloring: Handle the case where END markers come before BEGIN markers properly. Enhance PR11334 fix to support extload from v2f32/v4f32 Add "blocked" heuristic to the Hexagon MI scheduler. Fold multiply by 0 or 1 when in UnsafeFPMath mode in SelectionDAG::getNode(). whitespace Add boolean simplification support from CMOV Fix an assertion failure when optimising a shufflevector incorrectly into concat_vectors, and a followup bug with SelectionDAG::getNode() creating nodes with invalid types. Minor cleanup. No functional change. ...
author: Stephen Hines <srhines@google.com> 2012-09-13 19:10:35 -0700
committer: Android Git Automerger <android-git-automerger@android.com> 2012-09-13 19:10:35 -0700
commit: 8f1c32e4f21c4e297f5690acf958a842384ba802 (patch)
tree: 52800183ec2d22164b8f396842142c3a8aab912a /lib/Target/Hexagon
parent: 828ded66831c0caaeecd2291a6bfb084f373d0e4 (diff)
parent: 1c4ad5ef4fab105f0c8af7edd026e00502fb6279 (diff)
download: external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.zip
external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.tar.gz
external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.tar.bz2
12 files changed, 1508 insertions, 5 deletions
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 1f2d8ac..306084b 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen
   HexagonExpandPredSpillCode.cpp
   HexagonFrameLowering.cpp
   HexagonHardwareLoops.cpp
+  HexagonMachineScheduler.cpp
   HexagonMCInstLower.cpp
   HexagonInstrInfo.cpp
   HexagonISelDAGToDAG.cpp
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
new file mode 100644
index 0000000..b131a8f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -0,0 +1,952 @@
+//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "HexagonMachineScheduler.h"
+
+#include <queue>
+
+using namespace llvm;
+
+static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
+                                  cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
+                                   cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
+  cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
+  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+  assert(I != Beg && "reached the top of the region, cannot decrement");
+  while (--I != Beg) {
+    if (!I->isDebugValue())
+      break;
+  }
+  return I;
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+  for(; I != End; ++I) {
+    if (!I->isDebugValue())
+      break;
+  }
+  return I;
+}
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
+void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    releaseSucc(SU, &*I);
+  }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
+void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+    SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    releasePred(SU, &*I);
+  }
+}
+
+void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
+                                    MachineBasicBlock::iterator InsertPos) {
+  // Advance RegionBegin if the first instruction moves down.
+  if (&*RegionBegin == MI)
+    ++RegionBegin;
+
+  // Update the instruction stream.
+  BB->splice(InsertPos, BB, MI);
+
+  // Update LiveIntervals
+  LIS->handleMove(MI);
+
+  // Recede RegionBegin if an instruction moves above the first.
+  if (RegionBegin == InsertPos)
+    RegionBegin = MI;
+}
+
+bool VLIWMachineScheduler::checkSchedLimit() {
+#ifndef NDEBUG
+  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+    CurrentTop = CurrentBottom;
+    return false;
+  }
+  ++NumInstrsScheduled;
+#endif
+  return true;
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
+                                MachineBasicBlock::iterator begin,
+                                MachineBasicBlock::iterator end,
+                                unsigned endcount)
+{
+  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+
+  // For convenience remember the end of the liveness region.
+  LiveRegionEnd =
+    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void VLIWMachineScheduler::initRegPressure() {
+  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+  // Close the RPTracker to finalize live ins.
+  RPTracker.closeRegion();
+
+  DEBUG(RPTracker.getPressure().dump(TRI));
+
+  // Initialize the live ins and live outs.
+  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+  // Close one end of the tracker so we can call
+  // getMaxUpward/DownwardPressureDelta before advancing across any
+  // instructions. This converts currently live regs into live ins/outs.
+  TopRPTracker.closeTop();
+  BotRPTracker.closeBottom();
+
+  // Account for liveness generated by the region boundary.
+  if (LiveRegionEnd != RegionEnd)
+    BotRPTracker.recede();
+
+  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+  // Cache the list of excess pressure sets in this region. This will also track
+  // the max pressure in the scheduled code for these sets.
+  RegionCriticalPSets.clear();
+  std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+    unsigned Limit = TRI->getRegPressureSetLimit(i);
+    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+          << "Limit " << Limit
+          << " Actual " << RegionPressure[i] << "\n");
+    if (RegionPressure[i] > Limit)
+      RegionCriticalPSets.push_back(PressureElement(i, 0));
+  }
+  DEBUG(dbgs() << "Excess PSets: ";
+        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+          dbgs() << TRI->getRegPressureSetName(
+            RegionCriticalPSets[i].PSetID) << " ";
+        dbgs() << "\n");
+
+  TotalPackets = 0;
+}
+
+// FIXME: When the pressure tracker deals in pressure differences then we won't
+// iterate over all RegionCriticalPSets[i].
+void VLIWMachineScheduler::
+updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
+    unsigned ID = RegionCriticalPSets[i].PSetID;
+    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
+    if ((int)NewMaxPressure[ID] > MaxUnits)
+      MaxUnits = NewMaxPressure[ID];
+  }
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
+  if (!SU || !SU->getInstr())
+    return false;
+
+  // First see if the pipeline could receive this instruction
+  // in the current cycle.
+  switch (SU->getInstr()->getOpcode()) {
+  default:
+    if (!ResourcesModel->canReserveResources(SU->getInstr()))
+      return false;
+  case TargetOpcode::EXTRACT_SUBREG:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INLINEASM:
+    break;
+  }
+
+  // Now see if there are no other dependencies to instructions already
+  // in the packet.
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+    if (Packet[i]->Succs.size() == 0)
+      continue;
+    for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+         E = Packet[i]->Succs.end(); I != E; ++I) {
+      // Since we do not add pseudos to packets, might as well
+      // ignore order dependencies.
+      if (I->isCtrl())
+        continue;
+
+      if (I->getSUnit() == SU)
+        return false;
+    }
+  }
+  return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+  bool startNewCycle = false;
+  // If this SU does not fit in the packet
+  // start a new one.
+  if (!isResourceAvailable(SU)) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    TotalPackets++;
+    startNewCycle = true;
+  }
+
+  switch (SU->getInstr()->getOpcode()) {
+  default:
+    ResourcesModel->reserveResources(SU->getInstr());
+    break;
+  case TargetOpcode::EXTRACT_SUBREG:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INLINEASM:
+    break;
+  }
+  Packet.push_back(SU);
+
+#ifndef NDEBUG
+  DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+    DEBUG(dbgs() << "\t[" << i << "] SU(");
+    DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+    DEBUG(Packet[i]->getInstr()->dump());
+  }
+#endif
+
+  // If packet is now full, reset the state so in the next cycle
+  // we start fresh.
+  if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    TotalPackets++;
+    startNewCycle = true;
+  }
+
+  return startNewCycle;
+}
+
+// Release all DAG roots for scheduling.
+void VLIWMachineScheduler::releaseRoots() {
+  SmallVector<SUnit*, 16> BotRoots;
+
+  for (std::vector<SUnit>::iterator
+         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (I->Preds.empty())
+      SchedImpl->releaseTopNode(&(*I));
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (I->Succs.empty())
+      BotRoots.push_back(&(*I));
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+    SchedImpl->releaseBottomNode(*I);
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+  DEBUG(dbgs()
+        << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
+        << " " << BB->getName()
+        << " in_func " << BB->getParent()->getFunction()->getName()
+        << " at loop depth "  << MLI->getLoopDepth(BB)
+        << " \n");
+
+  // Initialize the register pressure tracker used by buildSchedGraph.
+  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+  // Account for liveness generate by the region boundary.
+  if (LiveRegionEnd != RegionEnd)
+    RPTracker.recede();
+
+  // Build the DAG, and compute current register pressure.
+  buildSchedGraph(AA, &RPTracker);
+
+  // Initialize top/bottom trackers after computing region pressure.
+  initRegPressure();
+
+  // To view Height/Depth correctly, they should be accessed at least once.
+  DEBUG(unsigned maxH = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getHeight() > maxH)
+            maxH = SUnits[su].getHeight();
+        dbgs() << "Max Height " << maxH << "\n";);
+  DEBUG(unsigned maxD = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getDepth() > maxD)
+            maxD = SUnits[su].getDepth();
+        dbgs() << "Max Depth " << maxD << "\n";);
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  if (ViewMISchedDAGs) viewGraph();
+
+  SchedImpl->initialize(this);
+
+  // Release edges from the special Entry node or to the special Exit node.
+  releaseSuccessors(&EntrySU);
+  releasePredecessors(&ExitSU);
+
+  // Release all DAG roots for scheduling.
+  releaseRoots();
+
+  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+  CurrentBottom = RegionEnd;
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    if (!checkSchedLimit())
+      break;
+
+    // Move the instruction to its new location in the instruction stream.
+    MachineInstr *MI = SU->getInstr();
+
+    if (IsTopNode) {
+      assert(SU->isTopReady() && "node still has unscheduled dependencies");
+      if (&*CurrentTop == MI)
+        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+      else {
+        moveInstruction(MI, CurrentTop);
+        TopRPTracker.setPos(MI);
+      }
+
+      // Update top scheduled pressure.
+      TopRPTracker.advance();
+      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+      updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+
+      // Release dependent instructions for scheduling.
+      releaseSuccessors(SU);
+    } else {
+      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+      MachineBasicBlock::iterator priorII =
+        priorNonDebug(CurrentBottom, CurrentTop);
+      if (&*priorII == MI)
+        CurrentBottom = priorII;
+      else {
+        if (&*CurrentTop == MI) {
+          CurrentTop = nextIfDebug(++CurrentTop, priorII);
+          TopRPTracker.setPos(CurrentTop);
+        }
+        moveInstruction(MI, CurrentBottom);
+        CurrentBottom = MI;
+      }
+      // Update bottom scheduled pressure.
+      BotRPTracker.recede();
+      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+      updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+
+      // Release dependent instructions for scheduling.
+      releasePredecessors(SU);
+    }
+    SU->isScheduled = true;
+    SchedImpl->schedNode(SU, IsTopNode);
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+  placeDebugValues();
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void VLIWMachineScheduler::placeDebugValues() {
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue) {
+    BB->splice(RegionBegin, BB, FirstDbgValue);
+    RegionBegin = FirstDbgValue;
+  }
+
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineBasicBlock::iterator OrigPrevMI = P.second;
+    BB->splice(++OrigPrevMI, BB, DbgValue);
+    if (OrigPrevMI == llvm::prior(RegionEnd))
+      RegionEnd = DbgValue;
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
+}
+
+void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
+  DAG = dag;
+  TRI = DAG->TRI;
+  Top.DAG = dag;
+  Bot.DAG = dag;
+
+  // Initialize the HazardRecognizers.
+  const TargetMachine &TM = DAG->MF.getTarget();
+  const InstrItineraryData *Itin = TM.getInstrItineraryData();
+  Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+  Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+  Top.ResourceModel = new VLIWResourceModel(TM);
+  Bot.ResourceModel = new VLIWResourceModel(TM);
+
+  assert((!ForceTopDown || !ForceBottomUp) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+  if (SU->isScheduled)
+    return;
+
+  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+    unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+      SU->TopReadyCycle = PredReadyCycle + MinLatency;
+  }
+  Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+  if (SU->isScheduled)
+    return;
+
+  assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+    unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+  }
+  Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+  if (HazardRec->isEnabled())
+    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+  if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+    return true;
+
+  return false;
+}
+
+void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
+                                                     unsigned ReadyCycle) {
+  if (ReadyCycle < MinReadyCycle)
+    MinReadyCycle = ReadyCycle;
+
+  // Check for interlocks first. For the purpose of other heuristics, an
+  // instruction that cannot issue appears as if it's not in the ReadyQueue.
+  if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+    Pending.push(SU);
+  else
+    Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
+  unsigned Width = DAG->getIssueWidth();
+  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+  unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+  if (!HazardRec->isEnabled()) {
+    // Bypass HazardRec virtual calls.
+    CurrCycle = NextCycle;
+  } else {
+    // Bypass getHazardType calls in case of long latency.
+    for (; CurrCycle != NextCycle; ++CurrCycle) {
+      if (isTop())
+        HazardRec->AdvanceCycle();
+      else
+        HazardRec->RecedeCycle();
+    }
+  }
+  CheckPending = true;
+
+  DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+        << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+  bool startNewCycle = false;
+
+  // Update the reservation table.
+  if (HazardRec->isEnabled()) {
+    if (!isTop() && SU->isCall) {
+      // Calls are scheduled with their preceding instructions. For bottom-up
+      // scheduling, clear the pipeline state before emitting.
+      HazardRec->Reset();
+    }
+    HazardRec->EmitInstruction(SU);
+  }
+
+  // Update DFA model.
+  startNewCycle = ResourceModel->reserveResources(SU);
+
+  // Check the instruction group dispatch limit.
+  // TODO: Check if this SU must end a dispatch group.
+  IssueCount += DAG->getNumMicroOps(SU->getInstr());
+  if (startNewCycle) {
+    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+    bumpCycle();
+  }
+  else
+    DEBUG(dbgs() << "*** IssueCount " << IssueCount
+          << " at cycle " << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
+  // If the available queue is empty, it is safe to reset MinReadyCycle.
+  if (Available.empty())
+    MinReadyCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+    SUnit *SU = *(Pending.begin()+i);
+    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+    if (ReadyCycle < MinReadyCycle)
+      MinReadyCycle = ReadyCycle;
+
+    if (ReadyCycle > CurrCycle)
+      continue;
+
+    if (checkHazard(SU))
+      continue;
+
+    Available.push(SU);
+    Pending.remove(Pending.begin()+i);
+    --i; --e;
+  }
+  CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
+  if (Available.isInQueue(SU))
+    Available.remove(Available.find(SU));
+  else {
+    assert(Pending.isInQueue(SU) && "bad ready count");
+    Pending.remove(Pending.find(SU));
+  }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
+  if (CheckPending)
+    releasePending();
+
+  for (unsigned i = 0; Available.empty(); ++i) {
+    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+           "permanent hazard"); (void)i;
+    bumpCycle();
+    releasePending();
+  }
+  if (Available.size() == 1)
+    return *Available.begin();
+  return NULL;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+                                             const ReadyQueue &Q,
+                                             SUnit *SU, PressureElement P) {
+  dbgs() << Label << " " << Q.getName() << " ";
+  if (P.isValid())
+    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+           << " ";
+  else
+    dbgs() << "     ";
+  SU->dump(DAG);
+}
+#endif
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+  SUnit *OnlyAvailableSucc = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    SUnit &Succ = *I->getSUnit();
+    if (!Succ.isScheduled) {
+      // We found an available, but not scheduled, successor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+        return 0;
+      OnlyAvailableSucc = &Succ;
+    }
+  }
+  return OnlyAvailableSucc;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned FactorOne = 2;
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+                                            SchedCandidate &Candidate,
+                                            RegPressureDelta &Delta,
+                                            bool verbose) {
+  // Initial trivial priority.
+  int ResCount = 1;
+
+  // Do not waste time on a node that is already scheduled.
+  if (!SU || SU->isScheduled)
+    return ResCount;
+
+  // Forced priority is high.
+  if (SU->isScheduleHigh)
+    ResCount += PriorityOne;
+
+  // Critical path first.
+  if (Q.getID() == TopQID) {
+    ResCount += (SU->getHeight() * ScaleTwo);
+
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Top.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  } else {
+    ResCount += (SU->getDepth() * ScaleTwo);
+
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Bot.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  }
+
+  unsigned NumNodesBlocking = 0;
+  if (Q.getID() == TopQID) {
+    // How many SUs does it block from scheduling?
+    // Look at all of the successors of this node.
+    // Count the number of nodes that
+    // this node is the sole unscheduled node for.
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I)
+      if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  } else {
+    // How many unscheduled predecessors block this node?
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I)
+      if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  }
+  ResCount += (NumNodesBlocking * ScaleTwo);
+
+  // Factor in reg pressure as a heuristic.
+  ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
+  ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
+
+  DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
+
+  return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+                  SchedCandidate &Candidate) {
+  DEBUG(Q.dump());
+
+  // getMaxPressureDelta temporarily modifies the tracker.
+  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+  // BestSU remains NULL if no top candidates beat the best existing candidate.
+  CandResult FoundCandidate = NoCand;
+  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+    RegPressureDelta RPDelta;
+    TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+                                    DAG->getRegionCriticalPSets(),
+                                    DAG->getRegPressure().MaxSetPressure);
+
+    int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+    // Initialize the candidate if needed.
+    if (!Candidate.SU) {
+      Candidate.SU = *I;
+      Candidate.RPDelta = RPDelta;
+      Candidate.SCost = CurrentCost;
+      FoundCandidate = NodeOrder;
+      continue;
+    }
+
+    // Best cost.
+    if (CurrentCost > Candidate.SCost) {
+      DEBUG(traceCandidate("CCAND", Q, *I));
+      Candidate.SU = *I;
+      Candidate.RPDelta = RPDelta;
+      Candidate.SCost = CurrentCost;
+      FoundCandidate = BestCost;
+      continue;
+    }
+
+    // Fall through to original instruction order.
+    // Only consider node order if Candidate was chosen from this Q.
+    if (FoundCandidate == NoCand)
+      continue;
+  }
+  return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+  // Schedule as far as possible in the direction of no choice. This is most
+  // efficient, but also provides the best heuristics for CriticalPSets.
+  if (SUnit *SU = Bot.pickOnlyChoice()) {
+    IsTopNode = false;
+    return SU;
+  }
+  if (SUnit *SU = Top.pickOnlyChoice()) {
+    IsTopNode = true;
+    return SU;
+  }
+  SchedCandidate BotCand;
+  // Prefer bottom scheduling when heuristics are silent.
+  CandResult BotResult = pickNodeFromQueue(Bot.Available,
+                                           DAG->getBotRPTracker(), BotCand);
+  assert(BotResult != NoCand && "failed to find the first candidate");
+
+  // If either Q has a single candidate that provides the least increase in
+  // Excess pressure, we can immediately schedule from that Q.
+  //
+  // RegionCriticalPSets summarizes the pressure within the scheduled region and
+  // affects picking from either Q. If scheduling in one direction must
+  // increase pressure for one of the excess PSets, then schedule in that
+  // direction first to provide more freedom in the other direction.
+  if (BotResult == SingleExcess || BotResult == SingleCritical) {
+    IsTopNode = false;
+    return BotCand.SU;
+  }
+  // Check if the top Q has a better candidate.
+  SchedCandidate TopCand;
+  CandResult TopResult = pickNodeFromQueue(Top.Available,
+                                           DAG->getTopRPTracker(), TopCand);
+  assert(TopResult != NoCand && "failed to find the first candidate");
+
+  if (TopResult == SingleExcess || TopResult == SingleCritical) {
+    IsTopNode = true;
+    return TopCand.SU;
+  }
+  // If either Q has a single candidate that minimizes pressure above the
+  // original region's pressure pick it.
+  if (BotResult == SingleMax) {
+    IsTopNode = false;
+    return BotCand.SU;
+  }
+  if (TopResult == SingleMax) {
+    IsTopNode = true;
+    return TopCand.SU;
+  }
+  if (TopCand.SCost > BotCand.SCost) {
+    IsTopNode = true;
+    return TopCand.SU;
+  }
+  // Otherwise prefer the bottom candidate in node order.
+  IsTopNode = false;
+  return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+  if (DAG->top() == DAG->bottom()) {
+    assert(Top.Available.empty() && Top.Pending.empty() &&
+           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+    return NULL;
+  }
+  SUnit *SU;
+  if (ForceTopDown) {
+    SU = Top.pickOnlyChoice();
+    if (!SU) {
+      SchedCandidate TopCand;
+      CandResult TopResult =
+        pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+      assert(TopResult != NoCand && "failed to find the first candidate");
+      (void)TopResult;
+      SU = TopCand.SU;
+    }
+    IsTopNode = true;
+  } else if (ForceBottomUp) {
+    SU = Bot.pickOnlyChoice();
+    if (!SU) {
+      SchedCandidate BotCand;
+      CandResult BotResult =
+        pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+      assert(BotResult != NoCand && "failed to find the first candidate");
+      (void)BotResult;
+      SU = BotCand.SU;
+    }
+    IsTopNode = false;
+  } else {
+    SU = pickNodeBidrectional(IsTopNode);
+  }
+  if (SU->isTopReady())
+    Top.removeReady(SU);
+  if (SU->isBottomReady())
+    Bot.removeReady(SU);
+
+  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+        << " Scheduling Instruction in cycle "
+        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+        SU->dump(DAG));
+  return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+  if (IsTopNode) {
+    SU->TopReadyCycle = Top.CurrCycle;
+    Top.bumpNode(SU);
+  } else {
+    SU->BotReadyCycle = Bot.CurrCycle;
+    Bot.bumpNode(SU);
+  }
+}
+
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
new file mode 100644
index 0000000..f3643d6
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -0,0 +1,437 @@
+//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler.      ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom Hexagon MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONASMPRINTER_H
+#define HEXAGONASMPRINTER_H
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class VLIWMachineScheduler;
+
+/// MachineSchedStrategy - Interface used by VLIWMachineScheduler to drive
+/// the selected scheduling algorithm.
+///
+/// TODO: Move this to ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+  virtual ~MachineSchedStrategy() {}
+
+  /// Initialize the strategy after building the DAG for a new region.
+  virtual void initialize(VLIWMachineScheduler *DAG) = 0;
+
+  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+  /// schedule the node at the top of the unscheduled region. Otherwise it will
+  /// be scheduled at the bottom.
+  virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+  /// Notify MachineSchedStrategy that VLIWMachineScheduler has
+  /// scheduled a node.
+  virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
+
+  /// When all predecessor dependencies have been resolved, free this node for
+  /// top-down scheduling.
+  virtual void releaseTopNode(SUnit *SU) = 0;
+  /// When all successor dependencies have been resolved, free this node for
+  /// bottom-up scheduling.
+  virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+
+//===----------------------------------------------------------------------===//
+// ConvergingVLIWScheduler - Implementation of the standard
+// MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
+/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
+/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
+class ReadyQueue {
+  unsigned ID;
+  std::string Name;
+  std::vector<SUnit*> Queue;
+
+public:
+  ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
+
+  unsigned getID() const { return ID; }
+
+  StringRef getName() const { return Name; }
+
+  // SU is in this queue if it's NodeQueueID is a superset of this ID.
+  bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
+
+  bool empty() const { return Queue.empty(); }
+
+  unsigned size() const { return Queue.size(); }
+
+  typedef std::vector<SUnit*>::iterator iterator;
+
+  iterator begin() { return Queue.begin(); }
+
+  iterator end() { return Queue.end(); }
+
+  iterator find(SUnit *SU) {
+    return std::find(Queue.begin(), Queue.end(), SU);
+  }
+
+  void push(SUnit *SU) {
+    Queue.push_back(SU);
+    SU->NodeQueueId |= ID;
+  }
+
+  void remove(iterator I) {
+    (*I)->NodeQueueId &= ~ID;
+    *I = Queue.back();
+    Queue.pop_back();
+  }
+
+  void dump() {
+    dbgs() << Name << ": ";
+    for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+      dbgs() << Queue[i]->NodeNum << " ";
+    dbgs() << "\n";
+  }
+};
+
+class VLIWResourceModel {
+  /// ResourcesModel - Represents VLIW state.
+  /// Not limited to VLIW targets per say, but assumes
+  /// definition of DFA by a target.
+  DFAPacketizer *ResourcesModel;
+
+  const InstrItineraryData *InstrItins;
+
+  /// Local packet/bundle model. Purely
+  /// internal to the MI schedulre at the time.
+  std::vector<SUnit*> Packet;
+
+  /// Total packets created.
+  unsigned TotalPackets;
+
+public:
+  VLIWResourceModel(MachineSchedContext *C, const InstrItineraryData *IID) :
+    InstrItins(IID), TotalPackets(0) {
+    const TargetMachine &TM = C->MF->getTarget();
+    ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
+
+    // This hard requirement could be relaxed,
+    // but for now do not let it proceed.
+    assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+    Packet.resize(InstrItins->SchedModel->IssueWidth);
+    Packet.clear();
+    ResourcesModel->clearResources();
+  }
+
+  VLIWResourceModel(const TargetMachine &TM) :
+    InstrItins(TM.getInstrItineraryData()), TotalPackets(0) {
+    ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
+
+    // This hard requirement could be relaxed,
+    // but for now do not let it proceed.
+    assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+    Packet.resize(InstrItins->SchedModel->IssueWidth);
+    Packet.clear();
+    ResourcesModel->clearResources();
+  }
+
+  ~VLIWResourceModel() {
+    delete ResourcesModel;
+  }
+
+  void resetPacketState() {
+    Packet.clear();
+  }
+
+  void resetDFA() {
+    ResourcesModel->clearResources();
+  }
+
+  void reset() {
+    Packet.clear();
+    ResourcesModel->clearResources();
+  }
+
+  bool isResourceAvailable(SUnit *SU);
+  bool reserveResources(SUnit *SU);
+  unsigned getTotalPackets() const { return TotalPackets; }
+};
+
+class VLIWMachineScheduler : public ScheduleDAGInstrs {
+  /// AA - AliasAnalysis for making memory reference queries.
+  AliasAnalysis *AA;
+
+  RegisterClassInfo *RegClassInfo;
+  MachineSchedStrategy *SchedImpl;
+
+  MachineBasicBlock::iterator LiveRegionEnd;
+
+  /// Register pressure in this region computed by buildSchedGraph.
+  IntervalPressure RegPressure;
+  RegPressureTracker RPTracker;
+
+  /// List of pressure sets that exceed the target's pressure limit before
+  /// scheduling, listed in increasing set ID order. Each pressure set is paired
+  /// with its max pressure in the currently scheduled regions.
+  std::vector<PressureElement> RegionCriticalPSets;
+
+  /// The top of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentTop;
+  IntervalPressure TopPressure;
+  RegPressureTracker TopRPTracker;
+
+  /// The bottom of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentBottom;
+  IntervalPressure BotPressure;
+  RegPressureTracker BotRPTracker;
+
+#ifndef NDEBUG
+  /// The number of instructions scheduled so far. Used to cut off the
+  /// scheduler at the point determined by misched-cutoff.
+  unsigned NumInstrsScheduled;
+#endif
+
+  /// Total packets in the region.
+  unsigned TotalPackets;
+
+  const MachineLoopInfo *MLI;
+public:
+  VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S):
+    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
+    RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
+    CurrentBottom(), BotRPTracker(BotPressure), MLI(C->MLI) {
+#ifndef NDEBUG
+    NumInstrsScheduled = 0;
+#endif
+    TotalPackets = 0;
+  }
+
+  virtual ~VLIWMachineScheduler() {
+    delete SchedImpl;
+  }
+
+  MachineBasicBlock::iterator top() const { return CurrentTop; }
+  MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
+
+  /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
+  /// region. This covers all instructions in a block, while schedule() may only
+  /// cover a subset.
+  void enterRegion(MachineBasicBlock *bb,
+                   MachineBasicBlock::iterator begin,
+                   MachineBasicBlock::iterator end,
+                   unsigned endcount);
+
+  /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
+  /// time to do some work.
+  void schedule();
+
+  unsigned CurCycle;
+
+  /// Get current register pressure for the top scheduled instructions.
+  const IntervalPressure &getTopPressure() const { return TopPressure; }
+  const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
+
+  /// Get current register pressure for the bottom scheduled instructions.
+  const IntervalPressure &getBotPressure() const { return BotPressure; }
+  const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
+
+  /// Get register pressure for the entire scheduling region before scheduling.
+  const IntervalPressure &getRegPressure() const { return RegPressure; }
+
+  const std::vector<PressureElement> &getRegionCriticalPSets() const {
+    return RegionCriticalPSets;
+  }
+
+  /// getIssueWidth - Return the max instructions per scheduling group.
+  unsigned getIssueWidth() const {
+    return (InstrItins && InstrItins->SchedModel)
+      ? InstrItins->SchedModel->IssueWidth : 1;
+  }
+
+  /// getNumMicroOps - Return the number of issue slots required for this MI.
+  unsigned getNumMicroOps(MachineInstr *MI) const {
+    return 1;
+    //if (!InstrItins) return 1;
+    //int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
+    //return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
+  }
+
+private:
+  void scheduleNodeTopDown(SUnit *SU);
+  void listScheduleTopDown();
+
+  void initRegPressure();
+  void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
+
+  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+  bool checkSchedLimit();
+
+  void releaseRoots();
+
+  void releaseSucc(SUnit *SU, SDep *SuccEdge);
+  void releaseSuccessors(SUnit *SU);
+  void releasePred(SUnit *SU, SDep *PredEdge);
+  void releasePredecessors(SUnit *SU);
+
+  void placeDebugValues();
+};
+
+/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
+/// to balance the schedule.
+class ConvergingVLIWScheduler : public MachineSchedStrategy {
+
+  /// Store the state used by ConvergingVLIWScheduler heuristics, required
+  ///  for the lifetime of one invocation of pickNode().
+  struct SchedCandidate {
+    // The best SUnit candidate.
+    SUnit *SU;
+
+    // Register pressure values for the best candidate.
+    RegPressureDelta RPDelta;
+
+    // Best scheduling cost.
+    int SCost;
+
+    SchedCandidate(): SU(NULL), SCost(0) {}
+  };
+  /// Represent the type of SchedCandidate found within a single queue.
+  enum CandResult {
+    NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
+    BestCost};
+
+  /// Each Scheduling boundary is associated with ready queues. It tracks the
+  /// current cycle in whichever direction at has moved, and maintains the state
+  /// of "hazards" and other interlocks at the current cycle.
+  struct SchedBoundary {
+    VLIWMachineScheduler *DAG;
+
+    ReadyQueue Available;
+    ReadyQueue Pending;
+    bool CheckPending;
+
+    ScheduleHazardRecognizer *HazardRec;
+    VLIWResourceModel *ResourceModel;
+
+    unsigned CurrCycle;
+    unsigned IssueCount;
+
+    /// MinReadyCycle - Cycle of the soonest available instruction.
+    unsigned MinReadyCycle;
+
+    // Remember the greatest min operand latency.
+    unsigned MaxMinLatency;
+
+    /// Pending queues extend the ready queues with the same ID and the
+    /// PendingFlag set.
+    SchedBoundary(unsigned ID, const Twine &Name):
+      DAG(0), Available(ID, Name+".A"),
+      Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
+      CheckPending(false), HazardRec(0), ResourceModel(0),
+      CurrCycle(0), IssueCount(0),
+      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+    ~SchedBoundary() {
+      delete ResourceModel;
+      delete HazardRec;
+    }
+
+    bool isTop() const {
+      return Available.getID() == ConvergingVLIWScheduler::TopQID;
+    }
+
+    bool checkHazard(SUnit *SU);
+
+    void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+    void bumpCycle();
+
+    void bumpNode(SUnit *SU);
+
+    void releasePending();
+
+    void removeReady(SUnit *SU);
+
+    SUnit *pickOnlyChoice();
+  };
+
+  VLIWMachineScheduler *DAG;
+  const TargetRegisterInfo *TRI;
+
+  // State of the top and bottom scheduled instruction boundaries.
+  SchedBoundary Top;
+  SchedBoundary Bot;
+
+public:
+  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+  enum {
+    TopQID = 1,
+    BotQID = 2,
+    LogMaxQID = 2
+  };
+
+  ConvergingVLIWScheduler():
+    DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+  virtual void initialize(VLIWMachineScheduler *dag);
+
+  virtual SUnit *pickNode(bool &IsTopNode);
+
+  virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+  virtual void releaseTopNode(SUnit *SU);
+
+  virtual void releaseBottomNode(SUnit *SU);
+
+protected:
+  SUnit *pickNodeBidrectional(bool &IsTopNode);
+
+  int SchedulingCost(ReadyQueue &Q,
+                     SUnit *SU, SchedCandidate &Candidate,
+                     RegPressureDelta &Delta, bool verbose);
+
+  CandResult pickNodeFromQueue(ReadyQueue &Q,
+                               const RegPressureTracker &RPTracker,
+                               SchedCandidate &Candidate);
+#ifndef NDEBUG
+  void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+                      PressureElement P = PressureElement());
+#endif
+};
+
+} // namespace
+
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 7ece408..1e91c39 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -337,7 +337,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
                << "********** Function: "
-               << MF.getFunction()->getName() << "\n");
+               << MF.getName() << "\n");
 
 #if 0
   // for now disable this, if we move NewValueJump before register
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 55cbc09..a295015 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -109,6 +109,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
 
   DenseMap<unsigned, unsigned> PeepholeMap;
+  DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;
 
   if (DisableHexagonPeephole) return false;
 
@@ -117,6 +118,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
        MBBb != MBBe; ++MBBb) {
     MachineBasicBlock* MBB = MBBb;
     PeepholeMap.clear();
+    PeepholeDoubleRegsMap.clear();
 
     // Traverse the basic block.
     for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
@@ -140,6 +142,24 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
         }
       }
 
+      // Look for this sequence below
+      // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
+      // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
+      // and convert into
+      // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
+      if (MI->getOpcode() == Hexagon::LSRd_ri) {
+        assert(MI->getNumOperands() == 3);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src1 = MI->getOperand(1);
+        MachineOperand &Src2 = MI->getOperand(2);
+        if (Src2.getImm() != 32)
+          continue;
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src1.getReg();
+        PeepholeDoubleRegsMap[DstReg] =
+          std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
+      }
+
       // Look for P=NOT(P).
       if (!DisablePNotP &&
           (MI->getOpcode() == Hexagon::NOT_p)) {
@@ -178,6 +198,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
             // Change the 1st operand.
             MI->RemoveOperand(1);
             MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+          } else  {
+            DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
+              PeepholeDoubleRegsMap.find(SrcReg);
+            if (DI != PeepholeDoubleRegsMap.end()) {
+              std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
+              MI->RemoveOperand(1);
+              MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
+                                                       false /*isDef*/,
+                                                       false /*isImp*/,
+                                                       false /*isKill*/,
+                                                       false /*isDead*/,
+                                                       false /*isUndef*/,
+                                                       false /*isEarlyClobber*/,
+                                                       PeepholeSrc.second));
+            }
           }
         }
       }
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2c23674..3742486 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -310,6 +310,58 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
   Moves.push_back(MachineMove(0, Dst, Src));
 }
 
+// Get the weight in units of pressure for this register class.
+const RegClassWeight &
+HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const {
+  // Each TargetRegisterClass has a per register weight, and weight
+  // limit which must be less than the limits of its pressure sets.
+  static const RegClassWeight RCWeightTable[] = {
+    {1, 32}, // IntRegs
+    {1, 8},  // CRRegs
+    {1, 4},  // PredRegs
+    {2, 16}, // DoubleRegs
+    {0, 0} };
+  return RCWeightTable[RC->getID()];
+}
+
+/// Get the number of dimensions of register pressure.
+unsigned HexagonRegisterInfo::getNumRegPressureSets() const {
+  return 4;
+}
+
+/// Get the name of this register unit pressure set.
+const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const {
+  static const char *const RegPressureSetName[] = {
+    "IntRegsRegSet",
+    "CRRegsRegSet",
+    "PredRegsRegSet",
+    "DoubleRegsRegSet"
+  };
+  assert((Idx < 4) && "Index out of bounds");
+  return RegPressureSetName[Idx];
+}
+
+/// Get the register unit pressure limit for this dimension.
+/// This limit must be adjusted dynamically for reserved registers.
+unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+  static const int RegPressureLimit [] = { 16, 4, 2, 8 };
+  assert((Idx < 4) && "Index out of bounds");
+  return RegPressureLimit[Idx];
+}
+
+const int*
+HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC)
+  const {
+  static const int RCSetsTable[] = {
+    0,  -1,  // IntRegs
+    1,  -1,  // CRRegs
+    2,  -1,  // PredRegs
+    0,  -1,  // DoubleRegs
+    -1 };
+  static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 };
+  unsigned SetListStart = RCSetStartTable[RC->getID()];
+  return &RCSetsTable[SetListStart];
+}
 unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
 }
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 85355ae..8820d13 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -87,6 +87,11 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
   unsigned getEHHandlerRegister() const;
+  const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
+  unsigned getNumRegPressureSets() const;
+  const char *getRegPressureSetName(unsigned Idx) const;
+  unsigned getRegPressureSetLimit(unsigned Idx) const;
+  const int* getRegClassPressureSets(const TargetRegisterClass *RC) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index d1076b8..b5ff69a 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -47,6 +47,7 @@ def HexagonModel : SchedMachineModel {
   // Max issue per cycle == bundle width.
   let IssueWidth = 4;
   let Itineraries = HexagonItineraries;
+  let LoadLatency = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 9b41126..5668ae8 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -58,6 +58,7 @@ def HexagonModelV4 : SchedMachineModel {
   // Max issue per cycle == bundle width.
   let IssueWidth = 4;
   let Itineraries = HexagonItinerariesV4;
+  let LoadLatency = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index a7b291f..5688e9c 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -14,6 +14,7 @@
 #include "HexagonTargetMachine.h"
 #include "Hexagon.h"
 #include "HexagonISelLowering.h"
+#include "HexagonMachineScheduler.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/PassManager.h"
@@ -29,6 +30,11 @@ opt<bool> DisableHardwareLoops(
                         "disable-hexagon-hwloops", cl::Hidden,
                         cl::desc("Disable Hardware Loops for Hexagon target"));
 
+static cl::
+opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
+                                cl::desc("Disable Hexagon MI Scheduling"));
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -42,6 +48,13 @@ extern "C" void LLVMInitializeHexagonTarget() {
   RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
 }
 
+static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
+  return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
+                    createVLIWMachineSched);
 
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
 ///
@@ -83,7 +96,13 @@ namespace {
 class HexagonPassConfig : public TargetPassConfig {
 public:
   HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
-    : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {
+    // Enable MI scheduler.
+    if (!DisableHexagonMISched) {
+      enablePass(&MachineSchedulerID);
+      MachineSchedRegistry::setDefault(createVLIWMachineSched);
+    }
+  }
 
   HexagonTargetMachine &getHexagonTargetMachine() const {
     return getTM<HexagonTargetMachine>();
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index a03ed03..3d5f685 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -3474,8 +3474,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       // 1. Two loads unless they are volatile.
       // 2. Two stores in V4 unless they are volatile.
       else if ((DepType == SDep::Order) &&
-               !I->hasVolatileMemoryRef() &&
-               !J->hasVolatileMemoryRef()) {
+               !I->hasOrderedMemoryRef() &&
+               !J->hasOrderedMemoryRef()) {
         if (QRI->Subtarget.hasV4TOps() &&
             // hexagonv4 allows dual store.
             MCIDI.mayStore() && MCIDJ.mayStore()) {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index d6e6c36..86f75d1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -24,7 +24,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
   HasLEB128 = true;
 
   PrivateGlobalPrefix = ".L";
-  LCOMMDirectiveType = LCOMM::ByteAlignment;
+  LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
   InlineAsmStart = "# InlineAsm Start";
   InlineAsmEnd = "# InlineAsm End";
   ZeroDirective = "\t.space\t";
author	Stephen Hines <srhines@google.com>	2012-09-13 19:10:35 -0700
committer	Android Git Automerger <android-git-automerger@android.com>	2012-09-13 19:10:35 -0700
commit	8f1c32e4f21c4e297f5690acf958a842384ba802 (patch)
tree	52800183ec2d22164b8f396842142c3a8aab912a /lib/Target/Hexagon
parent	828ded66831c0caaeecd2291a6bfb084f373d0e4 (diff)
parent	1c4ad5ef4fab105f0c8af7edd026e00502fb6279 (diff)
download	external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.zip external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.tar.gz external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.tar.bz2