24 files changed, 2015 insertions, 765 deletions
diff --git a/utils/Makefile b/utils/Makefile
index ecb30be..7a3c17d 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
-	      count fpcmp llvm-lit not unittest
+	      count fpcmp llvm-lit not unittest yaml2obj
 
 EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 0a8ae46..abcec8f 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -2447,7 +2447,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   emitSubtargetFeatureFlagEnumeration(Info, OS);
 
   // Emit the function to match a register name to number.
-  emitMatchRegisterName(Target, AsmParser, OS);
+  // This should be omitted for Mips target
+  if (AsmParser->getValueAsBit("ShouldEmitMatchRegisterName"))
+    emitMatchRegisterName(Target, AsmParser, OS);
 
   OS << "#endif // GET_REGISTER_MATCHER\n\n";
 
@@ -2649,7 +2651,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
   OS << "      if (i + 1 >= Operands.size()) {\n";
   OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
-  OS << "        if (!OperandsValid) ErrorInfo = i + 1;\n;";
+  OS << "        if (!OperandsValid) ErrorInfo = i + 1;\n";
   OS << "        break;\n";
   OS << "      }\n";
   OS << "      unsigned Diag = validateOperandClass(Operands[i+1],\n";
@@ -2716,8 +2718,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  }\n\n";
 
   OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
-  OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)";
-  OS << "  return RetCode;\n";
+  OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n";
+  OS << "    return RetCode;\n\n";
   OS << "  // Missing feature matches return which features were missing\n";
   OS << "  ErrorInfo = MissingFeatures;\n";
   OS << "  return Match_MissingFeature;\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index bd153a8..57979b3 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -14,8 +14,8 @@
 
 #include "AsmWriterInst.h"
 #include "CodeGenTarget.h"
-#include "StringToOffsetTable.h"
 #include "SequenceToOffsetTable.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 33381e9..12e153a 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -297,6 +297,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
   isCompare    = R->getValueAsBit("isCompare");
   isMoveImm    = R->getValueAsBit("isMoveImm");
   isBitcast    = R->getValueAsBit("isBitcast");
+  isSelect     = R->getValueAsBit("isSelect");
   isBarrier    = R->getValueAsBit("isBarrier");
   isCall       = R->getValueAsBit("isCall");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 3ba9f24..95b572d 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -222,6 +222,7 @@ namespace llvm {
     bool isCompare;
     bool isMoveImm;
     bool isBitcast;
+    bool isSelect;
     bool isBarrier;
     bool isCall;
     bool canFoldAsLoad;
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 81bf9ed..011f4b7 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -28,19 +28,15 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
-  : TheDef(R),
-    EnumValue(Enum)
-{}
-
-std::string CodeGenSubRegIndex::getNamespace() const {
-  if (TheDef->getValue("Namespace"))
-    return TheDef->getValueAsString("Namespace");
-  else
-    return "";
+  : TheDef(R), EnumValue(Enum) {
+  Name = R->getName();
+  if (R->getValue("Namespace"))
+    Namespace = R->getValueAsString("Namespace");
 }
 
-const std::string &CodeGenSubRegIndex::getName() const {
-  return TheDef->getName();
+CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace,
+                                       unsigned Enum)
+  : TheDef(0), Name(N), Namespace(Nspace), EnumValue(Enum) {
 }
 
 std::string CodeGenSubRegIndex::getQualifiedName() const {
@@ -52,16 +48,31 @@ std::string CodeGenSubRegIndex::getQualifiedName() const {
 }
 
 void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
-  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
-  if (Comps.empty())
+  if (!TheDef)
     return;
-  if (Comps.size() != 2)
-    throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
-  CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
-  CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
-  CodeGenSubRegIndex *X = A->addComposite(B, this);
-  if (X)
-    throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+
+  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
+  if (!Comps.empty()) {
+    if (Comps.size() != 2)
+      throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
+    CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
+    CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
+    CodeGenSubRegIndex *X = A->addComposite(B, this);
+    if (X)
+      throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+  }
+
+  std::vector<Record*> Parts =
+    TheDef->getValueAsListOfDefs("CoveringSubRegIndices");
+  if (!Parts.empty()) {
+    if (Parts.size() < 2)
+      throw TGError(TheDef->getLoc(),
+                    "CoveredBySubRegs must have two or more entries");
+    SmallVector<CodeGenSubRegIndex*, 8> IdxParts;
+    for (unsigned i = 0, e = Parts.size(); i != e; ++i)
+      IdxParts.push_back(RegBank.getSubRegIdx(Parts[i]));
+    RegBank.addConcatSubRegIndex(IdxParts, this);
+  }
 }
 
 void CodeGenSubRegIndex::cleanComposites() {
@@ -187,10 +198,7 @@ bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) {
   unsigned OldNumUnits = RegUnits.size();
   for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
        I != E; ++I) {
-    // Strangely a register may have itself as a subreg (self-cycle) e.g. XMM.
     CodeGenRegister *SR = I->second;
-    if (SR == this)
-      continue;
     // Merge the subregister's units into this register's RegUnits.
     mergeRegUnits(RegUnits, SR->RegUnits);
   }
@@ -260,44 +268,6 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
     }
   }
 
-  // Process the composites.
-  ListInit *Comps = TheDef->getValueAsListInit("CompositeIndices");
-  for (unsigned i = 0, e = Comps->size(); i != e; ++i) {
-    DagInit *Pat = dynamic_cast<DagInit*>(Comps->getElement(i));
-    if (!Pat)
-      throw TGError(TheDef->getLoc(), "Invalid dag '" +
-                    Comps->getElement(i)->getAsString() +
-                    "' in CompositeIndices");
-    DefInit *BaseIdxInit = dynamic_cast<DefInit*>(Pat->getOperator());
-    if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex"))
-      throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
-                    Pat->getAsString());
-    CodeGenSubRegIndex *BaseIdx = RegBank.getSubRegIdx(BaseIdxInit->getDef());
-
-    // Resolve list of subreg indices into R2.
-    CodeGenRegister *R2 = this;
-    for (DagInit::const_arg_iterator di = Pat->arg_begin(),
-         de = Pat->arg_end(); di != de; ++di) {
-      DefInit *IdxInit = dynamic_cast<DefInit*>(*di);
-      if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex"))
-        throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
-                      Pat->getAsString());
-      CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxInit->getDef());
-      const SubRegMap &R2Subs = R2->computeSubRegs(RegBank);
-      SubRegMap::const_iterator ni = R2Subs.find(Idx);
-      if (ni == R2Subs.end())
-        throw TGError(TheDef->getLoc(), "Composite " + Pat->getAsString() +
-                      " refers to bad index in " + R2->getName());
-      R2 = ni->second;
-    }
-
-    // Insert composite index. Allow overriding inherited indices etc.
-    SubRegs[BaseIdx] = R2;
-
-    // R2 is no longer an orphan.
-    Orphans.erase(R2);
-  }
-
   // Now Orphans contains the inherited subregisters without a direct index.
   // Create inferred indexes for all missing entries.
   // Work backwards in the Indices vector in order to compose subregs bottom-up.
@@ -327,14 +297,25 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
   // Compute the inverse SubReg -> Idx map.
   for (SubRegMap::const_iterator SI = SubRegs.begin(), SE = SubRegs.end();
        SI != SE; ++SI) {
-    // Ignore idempotent sub-register indices.
-    if (SI->second == this)
+    if (SI->second == this) {
+      SMLoc Loc;
+      if (TheDef)
+        Loc = TheDef->getLoc();
+      throw TGError(Loc, "Register " + getName() +
+                    " has itself as a sub-register");
+    }
+    // Ensure that every sub-register has a unique name.
+    DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins =
+      SubReg2Idx.insert(std::make_pair(SI->second, SI->first)).first;
+    if (Ins->second == SI->first)
       continue;
-    // Is is possible to have multiple names for the same sub-register.
-    // For example, XMM0 appears as sub_xmm, sub_sd, and sub_ss in YMM0.
-    // Eventually, this degeneration should go away, but for now we simply give
-    // precedence to the explicit sub-register index over the inherited ones.
-    SubReg2Idx.insert(std::make_pair(SI->second, SI->first));
+    // Trouble: Two different names for SI->second.
+    SMLoc Loc;
+    if (TheDef)
+      Loc = TheDef->getLoc();
+    throw TGError(Loc, "Sub-register can't have two names: " +
+                  SI->second->getName() + " available as " +
+                  SI->first->getName() + " and " + Ins->second->getName());
   }
 
   // Derive possible names for sub-register concatenations from any explicit
@@ -508,8 +489,6 @@ void CodeGenRegister::computeSuperRegs(CodeGenRegBank &RegBank) {
     Id.push_back(I->first->EnumValue);
     Id.push_back(I->second->TopoSig);
 
-    if (I->second == this)
-      continue;
     // Don't add duplicate entries.
     if (!I->second->SuperRegs.empty() && I->second->SuperRegs.back() == this)
       continue;
@@ -530,8 +509,7 @@ CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet,
   // Add any secondary sub-registers that weren't part of the explicit tree.
   for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
        I != E; ++I)
-    if (I->second != this)
-      OSet.insert(I->second);
+    OSet.insert(I->second);
 }
 
 // Compute overlapping registers.
@@ -970,7 +948,7 @@ void CodeGenRegisterClass::buildRegUnitSet(
 //                               CodeGenRegBank
 //===----------------------------------------------------------------------===//
 
-CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
+CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
   // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
   Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
@@ -980,7 +958,6 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   // More indices will be synthesized later.
   std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
   std::sort(SRIs.begin(), SRIs.end(), LessRecord());
-  NumNamedIndices = SRIs.size();
   for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
     getSubRegIdx(SRIs[i]);
   // Build composite maps from ComposedOf fields.
@@ -1048,6 +1025,15 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   CodeGenRegisterClass::computeSubClasses(*this);
 }
 
+// Create a synthetic CodeGenSubRegIndex without a corresponding Record.
+CodeGenSubRegIndex*
+CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) {
+  CodeGenSubRegIndex *Idx = new CodeGenSubRegIndex(Name, Namespace,
+                                                   SubRegIndices.size() + 1);
+  SubRegIndices.push_back(Idx);
+  return Idx;
+}
+
 CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) {
   CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def];
   if (Idx)
@@ -1112,7 +1098,7 @@ CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A,
 
   // None exists, synthesize one.
   std::string Name = A->getName() + "_then_" + B->getName();
-  Comp = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  Comp = createSubRegIndex(Name, A->getNamespace());
   A->addComposite(B, Comp);
   return Comp;
 }
@@ -1132,7 +1118,7 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex*, 8> &Parts) {
     Name += '_';
     Name += Parts[i]->getName();
   }
-  return Idx = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  return Idx = createSubRegIndex(Name, Parts.front()->getNamespace());
 }
 
 void CodeGenRegBank::computeComposites() {
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index eb6724e..827063e 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -35,14 +35,17 @@ namespace llvm {
   /// CodeGenSubRegIndex - Represents a sub-register index.
   class CodeGenSubRegIndex {
     Record *const TheDef;
+    std::string Name;
+    std::string Namespace;
 
   public:
     const unsigned EnumValue;
 
     CodeGenSubRegIndex(Record *R, unsigned Enum);
+    CodeGenSubRegIndex(StringRef N, StringRef Nspace, unsigned Enum);
 
-    const std::string &getName() const;
-    std::string getNamespace() const;
+    const std::string &getName() const { return Name; }
+    const std::string &getNamespace() const { return Namespace; }
     std::string getQualifiedName() const;
 
     // Order CodeGenSubRegIndex pointers by EnumValue.
@@ -422,13 +425,13 @@ namespace llvm {
   // CodeGenRegBank - Represent a target's registers and the relations between
   // them.
   class CodeGenRegBank {
-    RecordKeeper &Records;
     SetTheory Sets;
 
     // SubRegIndices.
     std::vector<CodeGenSubRegIndex*> SubRegIndices;
     DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx;
-    unsigned NumNamedIndices;
+
+    CodeGenSubRegIndex *createSubRegIndex(StringRef Name, StringRef NameSpace);
 
     typedef std::map<SmallVector<CodeGenSubRegIndex*, 8>,
                      CodeGenSubRegIndex*> ConcatIdxMap;
@@ -495,7 +498,6 @@ namespace llvm {
     // in the .td files. The rest are synthesized such that all sub-registers
     // have a unique name.
     ArrayRef<CodeGenSubRegIndex*> getSubRegIndices() { return SubRegIndices; }
-    unsigned getNumNamedIndices() { return NumNamedIndices; }
 
     // Find a SubRegIndex form its Record def.
     CodeGenSubRegIndex *getSubRegIdx(Record*);
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 2cdde55..e89c393 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -17,9 +17,15 @@
 #include "CodeGenTarget.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
@@ -35,9 +41,7 @@ struct EncodingField {
   EncodingField(unsigned B, unsigned W, unsigned O)
     : Base(B), Width(W), Offset(O) { }
 };
-} // End anonymous namespace
 
-namespace {
 struct OperandInfo {
   std::vector<EncodingField> Fields;
   std::string Decoder;
@@ -56,10 +60,25 @@ struct OperandInfo {
   const_iterator begin() const { return Fields.begin(); }
   const_iterator end() const   { return Fields.end();   }
 };
+
+typedef std::vector<uint8_t> DecoderTable;
+typedef uint32_t DecoderFixup;
+typedef std::vector<DecoderFixup> FixupList;
+typedef std::vector<FixupList> FixupScopeList;
+typedef SetVector<std::string> PredicateSet;
+typedef SetVector<std::string> DecoderSet;
+struct DecoderTableInfo {
+  DecoderTable Table;
+  FixupScopeList FixupStack;
+  PredicateSet Predicates;
+  DecoderSet Decoders;
+};
+
 } // End anonymous namespace
 
 namespace {
 class FixedLenDecoderEmitter {
+  const std::vector<const CodeGenInstruction*> *NumberedInstructions;
 public:
 
   // Defaults preserved here for documentation, even though they aren't
@@ -77,6 +96,17 @@ public:
     GuardPrefix(GPrefix), GuardPostfix(GPostfix),
     ReturnOK(ROK), ReturnFail(RFail), Locals(L) {}
 
+  // Emit the decoder state machine table.
+  void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
+                 unsigned Indentation, unsigned BitWidth,
+                 StringRef Namespace) const;
+  void emitPredicateFunction(formatted_raw_ostream &OS,
+                             PredicateSet &Predicates,
+                             unsigned Indentation) const;
+  void emitDecoderFunction(formatted_raw_ostream &OS,
+                           DecoderSet &Decoders,
+                           unsigned Indentation) const;
+
   // run - Output the code emitter
   void run(raw_ostream &o);
 
@@ -120,9 +150,7 @@ static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
 }
 // Prints the bit value for each position.
 static void dumpBits(raw_ostream &o, const BitsInit &bits) {
-  unsigned index;
-
-  for (index = bits.getNumBits(); index > 0; index--) {
+  for (unsigned index = bits.getNumBits(); index > 0; --index) {
     switch (bitFromBits(bits, index - 1)) {
     case BIT_TRUE:
       o << "1";
@@ -238,8 +266,9 @@ public:
   // match the remaining undecoded encoding bits against the singleton.
   void recurse();
 
-  // Emit code to decode instructions given a segment or segments of bits.
-  void emit(raw_ostream &o, unsigned &Indentation) const;
+  // Emit table entries to decode instructions given a segment or segments of
+  // bits.
+  void emitTableEntry(DecoderTableInfo &TableInfo) const;
 
   // Returns the number of fanout produced by the filter.  More fanout implies
   // the filter distinguishes more categories of instructions.
@@ -338,12 +367,7 @@ public:
     doFilter();
   }
 
-  // The top level filter chooser has NULL as its parent.
-  bool isTopLevel() const { return Parent == NULL; }
-
-  // Emit the top level typedef and decodeInstruction() function.
-  void emitTop(raw_ostream &o, unsigned Indentation,
-               const std::string &Namespace) const;
+  unsigned getBitWidth() const { return BitWidth; }
 
 protected:
   // Populates the insn given the uid.
@@ -414,21 +438,28 @@ protected:
   bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
                           unsigned Opc) const;
 
-  void emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                         unsigned Opc) const;
+  bool doesOpcodeNeedPredicate(unsigned Opc) const;
+  unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
+  void emitPredicateTableEntry(DecoderTableInfo &TableInfo,
+                               unsigned Opc) const;
+
+  void emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
+                              unsigned Opc) const;
 
-  // Emits code to decode the singleton.  Return true if we have matched all the
-  // well-known bits.
-  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            unsigned Opc) const;
+  // Emits table entries to decode the singleton.
+  void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                               unsigned Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            const Filter &Best) const;
+  void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                               const Filter &Best) const;
 
-  void emitBinaryParser(raw_ostream &o , unsigned &Indentation,
+  void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
                         const OperandInfo &OpInfo) const;
 
+  void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc) const;
+  unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc) const;
+
   // Assign a single filter and run with it.
   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
 
@@ -447,10 +478,10 @@ protected:
   // dump the conflict set to the standard error.
   void doFilter();
 
-  // Emits code to decode our share of instructions.  Returns true if the
-  // emitted code causes a return, which occurs if we know how to decode
-  // the instruction at this level or the instruction is not decodeable.
-  bool emit(raw_ostream &o, unsigned &Indentation) const;
+public:
+  // emitTableEntries - Emit state machine entries to decode our share of
+  // instructions.
+  void emitTableEntries(DecoderTableInfo &TableInfo) const;
 };
 } // End anonymous namespace
 
@@ -524,11 +555,9 @@ void Filter::recurse() {
   // Starts by inheriting our parent filter chooser's filter bit values.
   std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues);
 
-  unsigned bitIndex;
-
   if (VariableInstructions.size()) {
     // Conservatively marks each segment position as BIT_UNSET.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
       BitValueArray[StartBit + bitIndex] = BIT_UNSET;
 
     // Delegates to an inferior filter chooser for further processing on this
@@ -544,7 +573,7 @@ void Filter::recurse() {
   }
 
   // No need to recurse for a singleton filtered instruction.
-  // See also Filter::emit().
+  // See also Filter::emit*().
   if (getNumFiltered() == 1) {
     //Owner->SingletonExists(LastOpcFiltered);
     assert(FilterChooserMap.size() == 1);
@@ -557,7 +586,7 @@ void Filter::recurse() {
        mapIterator++) {
 
     // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) {
       if (mapIterator->first & (1ULL << bitIndex))
         BitValueArray[StartBit + bitIndex] = BIT_TRUE;
       else
@@ -577,64 +606,100 @@ void Filter::recurse() {
   }
 }
 
-// Emit code to decode instructions given a segment or segments of bits.
-void Filter::emit(raw_ostream &o, unsigned &Indentation) const {
-  o.indent(Indentation) << "// Check Inst{";
-
-  if (NumBits > 1)
-    o << (StartBit + NumBits - 1) << '-';
+static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
+                               uint32_t DestIdx) {
+  // Any NumToSkip fixups in the current scope can resolve to the
+  // current location.
+  for (FixupList::const_reverse_iterator I = Fixups.rbegin(),
+                                         E = Fixups.rend();
+       I != E; ++I) {
+    // Calculate the distance from the byte following the fixup entry byte
+    // to the destination. The Target is calculated from after the 16-bit
+    // NumToSkip entry itself, so subtract two  from the displacement here
+    // to account for that.
+    uint32_t FixupIdx = *I;
+    uint32_t Delta = DestIdx - FixupIdx - 2;
+    // Our NumToSkip entries are 16-bits. Make sure our table isn't too
+    // big.
+    assert(Delta < 65536U && "disassembler decoding table too large!");
+    Table[FixupIdx] = (uint8_t)Delta;
+    Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
+  }
+}
 
-  o << StartBit << "} ...\n";
+// Emit table entries to decode instructions given a segment or segments
+// of bits.
+void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
+  TableInfo.Table.push_back(MCD::OPC_ExtractField);
+  TableInfo.Table.push_back(StartBit);
+  TableInfo.Table.push_back(NumBits);
 
-  o.indent(Indentation) << "switch (fieldFromInstruction" << Owner->BitWidth
-                        << "(insn, " << StartBit << ", "
-                        << NumBits << ")) {\n";
+  // A new filter entry begins a new scope for fixup resolution.
+  TableInfo.FixupStack.push_back(FixupList());
 
   std::map<unsigned, const FilterChooser*>::const_iterator filterIterator;
 
-  bool DefaultCase = false;
+  DecoderTable &Table = TableInfo.Table;
+
+  size_t PrevFilter = 0;
+  bool HasFallthrough = false;
   for (filterIterator = FilterChooserMap.begin();
        filterIterator != FilterChooserMap.end();
        filterIterator++) {
-
     // Field value -1 implies a non-empty set of variable instructions.
     // See also recurse().
     if (filterIterator->first == (unsigned)-1) {
-      DefaultCase = true;
-
-      o.indent(Indentation) << "default:\n";
-      o.indent(Indentation) << "  break; // fallthrough\n";
-
-      // Closing curly brace for the switch statement.
-      // This is unconventional because we want the default processing to be
-      // performed for the fallthrough cases as well, i.e., when the "cases"
-      // did not prove a decoded instruction.
-      o.indent(Indentation) << "}\n";
-
-    } else
-      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+      HasFallthrough = true;
+
+      // Each scope should always have at least one filter value to check
+      // for.
+      assert(PrevFilter != 0 && "empty filter set!");
+      FixupList &CurScope = TableInfo.FixupStack.back();
+      // Resolve any NumToSkip fixups in the current scope.
+      resolveTableFixups(Table, CurScope, Table.size());
+      CurScope.clear();
+      PrevFilter = 0;  // Don't re-process the filter's fallthrough.
+    } else {
+      Table.push_back(MCD::OPC_FilterValue);
+      // Encode and emit the value to filter against.
+      uint8_t Buffer[8];
+      unsigned Len = encodeULEB128(filterIterator->first, Buffer);
+      Table.insert(Table.end(), Buffer, Buffer + Len);
+      // Reserve space for the NumToSkip entry. We'll backpatch the value
+      // later.
+      PrevFilter = Table.size();
+      Table.push_back(0);
+      Table.push_back(0);
+    }
 
     // We arrive at a category of instructions with the same segment value.
     // Now delegate to the sub filter chooser for further decodings.
     // The case may fallthrough, which happens if the remaining well-known
     // encoding bits do not match exactly.
-    if (!DefaultCase) { ++Indentation; ++Indentation; }
-
-    filterIterator->second->emit(o, Indentation);
-    // For top level default case, there's no need for a break statement.
-    if (Owner->isTopLevel() && DefaultCase)
-      break;
-    
-    o.indent(Indentation) << "break;\n";
-
-    if (!DefaultCase) { --Indentation; --Indentation; }
+    filterIterator->second->emitTableEntries(TableInfo);
+
+    // Now that we've emitted the body of the handler, update the NumToSkip
+    // of the filter itself to be able to skip forward when false. Subtract
+    // two as to account for the width of the NumToSkip field itself.
+    if (PrevFilter) {
+      uint32_t NumToSkip = Table.size() - PrevFilter - 2;
+      assert(NumToSkip < 65536U && "disassembler decoding table too large!");
+      Table[PrevFilter] = (uint8_t)NumToSkip;
+      Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
+    }
   }
 
-  // If there is no default case, we still need to supply a closing brace.
-  if (!DefaultCase) {
-    // Closing curly brace for the switch statement.
-    o.indent(Indentation) << "}\n";
-  }
+  // Any remaining unresolved fixups bubble up to the parent fixup scope.
+  assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!");
+  FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1;
+  FixupScopeList::iterator Dest = Source - 1;
+  Dest->insert(Dest->end(), Source->begin(), Source->end());
+  TableInfo.FixupStack.pop_back();
+
+  // If there is no fallthrough, then the final filter should get fixed
+  // up according to the enclosing scope rather than the current position.
+  if (!HasFallthrough)
+    TableInfo.FixupStack.back().push_back(PrevFilter);
 }
 
 // Returns the number of fanout produced by the filter.  More fanout implies
@@ -652,31 +717,205 @@ unsigned Filter::usefulness() const {
 //                              //
 //////////////////////////////////
 
-// Emit the top level typedef and decodeInstruction() function.
-void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
-                            const std::string &Namespace) const {
-  o.indent(Indentation) <<
-    "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction"
-    << BitWidth << "(MCInst &MI, uint" << BitWidth
-    << "_t insn, uint64_t Address, "
-    << "const void *Decoder, const MCSubtargetInfo &STI) {\n";
-  o.indent(Indentation) << "  unsigned tmp = 0;\n";
-  o.indent(Indentation) << "  (void)tmp;\n";
-  o.indent(Indentation) << Emitter->Locals << "\n";
-  o.indent(Indentation) << "  uint64_t Bits = STI.getFeatureBits();\n";
-  o.indent(Indentation) << "  (void)Bits;\n";
-
-  ++Indentation; ++Indentation;
-  // Emits code to decode the instructions.
-  emit(o, Indentation);
-
-  o << '\n';
-  o.indent(Indentation) << "return " << Emitter->ReturnFail << ";\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
-
-  o << '\n';
+// Emit the decoder state machine table.
+void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
+                                       DecoderTable &Table,
+                                       unsigned Indentation,
+                                       unsigned BitWidth,
+                                       StringRef Namespace) const {
+  OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
+    << BitWidth << "[] = {\n";
+
+  Indentation += 2;
+
+  // FIXME: We may be able to use the NumToSkip values to recover
+  // appropriate indentation levels.
+  DecoderTable::const_iterator I = Table.begin();
+  DecoderTable::const_iterator E = Table.end();
+  while (I != E) {
+    assert (I < E && "incomplete decode table entry!");
+
+    uint64_t Pos = I - Table.begin();
+    OS << "/* " << Pos << " */";
+    OS.PadToColumn(12);
+
+    switch (*I) {
+    default:
+      throw "invalid decode table opcode";
+    case MCD::OPC_ExtractField: {
+      ++I;
+      unsigned Start = *I++;
+      unsigned Len = *I++;
+      OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", "
+        << Len << ",  // Inst{";
+      if (Len > 1)
+        OS << (Start + Len - 1) << "-";
+      OS << Start << "} ...\n";
+      break;
+    }
+    case MCD::OPC_FilterValue: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_FilterValue, ";
+      // The filter value is ULEB128 encoded.
+      while (*I >= 128)
+        OS << utostr(*I++) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_CheckField: {
+      ++I;
+      unsigned Start = *I++;
+      unsigned Len = *I++;
+      OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", "
+        << Len << ", ";// << Val << ", " << NumToSkip << ",\n";
+      // ULEB128 encoded field value.
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_CheckPredicate: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_CheckPredicate, ";
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_Decode: {
+      ++I;
+      // Extract the ULEB128 encoded Opcode to a buffer.
+      uint8_t Buffer[8], *p = Buffer;
+      while ((*p++ = *I++) >= 128)
+        assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer)
+               && "ULEB128 value too large!");
+      // Decode the Opcode value.
+      unsigned Opc = decodeULEB128(Buffer);
+      OS.indent(Indentation) << "MCD::OPC_Decode, ";
+      for (p = Buffer; *p >= 128; ++p)
+        OS << utostr(*p) << ", ";
+      OS << utostr(*p) << ", ";
+
+      // Decoder index.
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      OS << "// Opcode: "
+         << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+      break;
+    }
+    case MCD::OPC_SoftFail: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_SoftFail";
+      // Positive mask
+      uint64_t Value = 0;
+      unsigned Shift = 0;
+      do {
+        OS << ", " << utostr(*I);
+        Value += (*I & 0x7f) << Shift;
+        Shift += 7;
+      } while (*I++ >= 128);
+      if (Value > 127)
+        OS << " /* 0x" << utohexstr(Value) << " */";
+      // Negative mask
+      Value = 0;
+      Shift = 0;
+      do {
+        OS << ", " << utostr(*I);
+        Value += (*I & 0x7f) << Shift;
+        Shift += 7;
+      } while (*I++ >= 128);
+      if (Value > 127)
+        OS << " /* 0x" << utohexstr(Value) << " */";
+      OS << ",\n";
+      break;
+    }
+    case MCD::OPC_Fail: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_Fail,\n";
+      break;
+    }
+    }
+  }
+  OS.indent(Indentation) << "0\n";
+
+  Indentation -= 2;
+
+  OS.indent(Indentation) << "};\n\n";
+}
+
+void FixedLenDecoderEmitter::
+emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
+                      unsigned Indentation) const {
+  // The predicate function is just a big switch statement based on the
+  // input predicate index.
+  OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
+    << "uint64_t Bits) {\n";
+  Indentation += 2;
+  OS.indent(Indentation) << "switch (Idx) {\n";
+  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
+  unsigned Index = 0;
+  for (PredicateSet::const_iterator I = Predicates.begin(), E = Predicates.end();
+       I != E; ++I, ++Index) {
+    OS.indent(Indentation) << "case " << Index << ":\n";
+    OS.indent(Indentation+2) << "return (" << *I << ");\n";
+  }
+  OS.indent(Indentation) << "}\n";
+  Indentation -= 2;
+  OS.indent(Indentation) << "}\n\n";
+}
+
+void FixedLenDecoderEmitter::
+emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
+                    unsigned Indentation) const {
+  // The decoder function is just a big switch statement based on the
+  // input decoder index.
+  OS.indent(Indentation) << "template<typename InsnType>\n";
+  OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
+    << " unsigned Idx, InsnType insn, MCInst &MI,\n";
+  OS.indent(Indentation) << "                                   uint64_t "
+    << "Address, const void *Decoder) {\n";
+  Indentation += 2;
+  OS.indent(Indentation) << "InsnType tmp;\n";
+  OS.indent(Indentation) << "switch (Idx) {\n";
+  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
+  unsigned Index = 0;
+  for (DecoderSet::const_iterator I = Decoders.begin(), E = Decoders.end();
+       I != E; ++I, ++Index) {
+    OS.indent(Indentation) << "case " << Index << ":\n";
+    OS << *I;
+    OS.indent(Indentation+2) << "return S;\n";
+  }
+  OS.indent(Indentation) << "}\n";
+  Indentation -= 2;
+  OS.indent(Indentation) << "}\n\n";
 }
 
 // Populates the field of the insn given the start position and the number of
@@ -703,9 +942,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
 /// filter array as a series of chars.
 void FilterChooser::dumpFilterArray(raw_ostream &o,
                                  const std::vector<bit_value_t> &filter) const {
-  unsigned bitIndex;
-
-  for (bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
+  for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
     switch (filter[bitIndex - 1]) {
     case BIT_UNFILTERED:
       o << ".";
@@ -827,26 +1064,71 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
 
   if (OpInfo.numFields() == 1) {
     OperandInfo::const_iterator OI = OpInfo.begin();
-    o.indent(Indentation) << "  tmp = fieldFromInstruction" << BitWidth
-                            << "(insn, " << OI->Base << ", " << OI->Width
-                            << ");\n";
+    o.indent(Indentation) << "tmp = fieldFromInstruction"
+                          << "(insn, " << OI->Base << ", " << OI->Width
+                          << ");\n";
   } else {
-    o.indent(Indentation) << "  tmp = 0;\n";
+    o.indent(Indentation) << "tmp = 0;\n";
     for (OperandInfo::const_iterator OI = OpInfo.begin(), OE = OpInfo.end();
          OI != OE; ++OI) {
-      o.indent(Indentation) << "  tmp |= (fieldFromInstruction" << BitWidth
+      o.indent(Indentation) << "tmp |= (fieldFromInstruction"
                             << "(insn, " << OI->Base << ", " << OI->Width
                             << ") << " << OI->Offset << ");\n";
     }
   }
 
   if (Decoder != "")
-    o.indent(Indentation) << "  " << Emitter->GuardPrefix << Decoder
+    o.indent(Indentation) << Emitter->GuardPrefix << Decoder
                           << "(MI, tmp, Address, Decoder)"
                           << Emitter->GuardPostfix << "\n";
   else
-    o.indent(Indentation) << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+    o.indent(Indentation) << "MI.addOperand(MCOperand::CreateImm(tmp));\n";
+
+}
+
+void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
+                                unsigned Opc) const {
+  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+    Operands.find(Opc);
+  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+  for (std::vector<OperandInfo>::const_iterator
+       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+    // If a custom instruction decoder was specified, use that.
+    if (I->numFields() == 0 && I->Decoder.size()) {
+      OS.indent(Indentation) << Emitter->GuardPrefix << I->Decoder
+        << "(MI, insn, Address, Decoder)"
+        << Emitter->GuardPostfix << "\n";
+      break;
+    }
 
+    emitBinaryParser(OS, Indentation, *I);
+  }
+}
+
+unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
+                                        unsigned Opc) const {
+  // Build up the predicate string.
+  SmallString<256> Decoder;
+  // FIXME: emitDecoder() function can take a buffer directly rather than
+  // a stream.
+  raw_svector_ostream S(Decoder);
+  unsigned I = 4;
+  emitDecoder(S, I, Opc);
+  S.flush();
+
+  // Using the full decoder string as the key value here is a bit
+  // heavyweight, but is effective. If the string comparisons become a
+  // performance concern, we can implement a mangling of the predicate
+  // data easilly enough with a map back to the actual string. That's
+  // overkill for now, though.
+
+  // Make sure the predicate is in the table.
+  Decoders.insert(Decoder.str());
+  // Now figure out the index for when we write out the table.
+  DecoderSet::const_iterator P = std::find(Decoders.begin(),
+                                           Decoders.end(),
+                                           Decoder.str());
+  return (unsigned)(P - Decoders.begin());
 }
 
 static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
@@ -887,8 +1169,74 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
   return Predicates->getSize() > 0;
 }
 
-void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                                      unsigned Opc) const {
+bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
+  ListInit *Predicates =
+    AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+  for (unsigned i = 0; i < Predicates->getSize(); ++i) {
+    Record *Pred = Predicates->getElementAsRecord(i);
+    if (!Pred->getValue("AssemblerMatcherPredicate"))
+      continue;
+
+    std::string P = Pred->getValueAsString("AssemblerCondString");
+
+    if (!P.length())
+      continue;
+
+    return true;
+  }
+  return false;
+}
+
+unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
+                                          StringRef Predicate) const {
+  // Using the full predicate string as the key value here is a bit
+  // heavyweight, but is effective. If the string comparisons become a
+  // performance concern, we can implement a mangling of the predicate
+  // data easilly enough with a map back to the actual string. That's
+  // overkill for now, though.
+
+  // Make sure the predicate is in the table.
+  TableInfo.Predicates.insert(Predicate.str());
+  // Now figure out the index for when we write out the table.
+  PredicateSet::const_iterator P = std::find(TableInfo.Predicates.begin(),
+                                             TableInfo.Predicates.end(),
+                                             Predicate.str());
+  return (unsigned)(P - TableInfo.Predicates.begin());
+}
+
+void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
+                                            unsigned Opc) const {
+  if (!doesOpcodeNeedPredicate(Opc))
+    return;
+
+  // Build up the predicate string.
+  SmallString<256> Predicate;
+  // FIXME: emitPredicateMatch() functions can take a buffer directly rather
+  // than a stream.
+  raw_svector_ostream PS(Predicate);
+  unsigned I = 0;
+  emitPredicateMatch(PS, I, Opc);
+
+  // Figure out the index into the predicate table for the predicate just
+  // computed.
+  unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
+  SmallString<16> PBytes;
+  raw_svector_ostream S(PBytes);
+  encodeULEB128(PIdx, S);
+  S.flush();
+
+  TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
+  // Predicate index
+  for (unsigned i = 0, e = PBytes.size(); i != e; ++i)
+    TableInfo.Table.push_back(PBytes[i]);
+  // Push location for NumToSkip backpatching.
+  TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+  TableInfo.Table.push_back(0);
+  TableInfo.Table.push_back(0);
+}
+
+void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
+                                           unsigned Opc) const {
   BitsInit *SFBits =
     AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
   if (!SFBits) return;
@@ -914,13 +1262,11 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
     default:
       // The bit is not set; this must be an error!
       StringRef Name = AllInstructions[Opc]->TheDef->getName();
-      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
-             << Name
-             << " is set but Inst{" << i <<"} is unset!\n"
+      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " << Name
+             << " is set but Inst{" << i << "} is unset!\n"
              << "  - You can only mark a bit as SoftFail if it is fully defined"
              << " (1/0 - not '?') in Inst\n";
-      o << "#error SoftFail Conflict, " << Name << "::SoftFail{" << i 
-        << "} set but Inst{" << i << "} undefined!\n";
+      return;
     }
   }
 
@@ -930,27 +1276,31 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
   if (!NeedPositiveMask && !NeedNegativeMask)
     return;
 
-  std::string PositiveMaskStr = PositiveMask.toString(16, /*signed=*/false);
-  std::string NegativeMaskStr = NegativeMask.toString(16, /*signed=*/false);
-  StringRef BitExt = "";
-  if (BitWidth > 32)
-    BitExt = "ULL";
-
-  o.indent(Indentation) << "if (";
-  if (NeedPositiveMask)
-    o << "insn & 0x" << PositiveMaskStr << BitExt;
-  if (NeedPositiveMask && NeedNegativeMask)
-    o << " || ";
-  if (NeedNegativeMask)
-    o << "~insn & 0x" << NegativeMaskStr << BitExt;
-  o << ")\n";
-  o.indent(Indentation+2) << "S = MCDisassembler::SoftFail;\n";
+  TableInfo.Table.push_back(MCD::OPC_SoftFail);
+
+  SmallString<16> MaskBytes;
+  raw_svector_ostream S(MaskBytes);
+  if (NeedPositiveMask) {
+    encodeULEB128(PositiveMask.getZExtValue(), S);
+    S.flush();
+    for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
+      TableInfo.Table.push_back(MaskBytes[i]);
+  } else
+    TableInfo.Table.push_back(0);
+  if (NeedNegativeMask) {
+    MaskBytes.clear();
+    S.resync();
+    encodeULEB128(NegativeMask.getZExtValue(), S);
+    S.flush();
+    for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
+      TableInfo.Table.push_back(MaskBytes[i]);
+  } else
+    TableInfo.Table.push_back(0);
 }
 
-// Emits code to decode the singleton.  Return true if we have matched all the
-// well-known bits.
-bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         unsigned Opc) const {
+// Emits table entries to decode the singleton.
+void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                                            unsigned Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
@@ -961,107 +1311,70 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
   getIslands(StartBits, EndBits, FieldVals, Insn);
 
   unsigned Size = StartBits.size();
-  unsigned I, NumBits;
 
-  // If we have matched all the well-known bits, just issue a return.
-  if (Size == 0) {
-    o.indent(Indentation) << "if (";
-    if (!emitPredicateMatch(o, Indentation, Opc))
-      o << "1";
-    o << ") {\n";
-    emitSoftFailCheck(o, Indentation+2, Opc);
-    o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-    std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
-      Operands.find(Opc);
-    const std::vector<OperandInfo>& InsnOperands = OpIter->second;
-    for (std::vector<OperandInfo>::const_iterator
-         I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
-      // If a custom instruction decoder was specified, use that.
-      if (I->numFields() == 0 && I->Decoder.size()) {
-        o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                              << "(MI, insn, Address, Decoder)"
-                              << Emitter->GuardPostfix << "\n";
-        break;
-      }
-
-      emitBinaryParser(o, Indentation, *I);
-    }
-
-    o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
-                          << nameWithID(Opc) << '\n';
-    o.indent(Indentation) << "}\n"; // Closing predicate block.
-    return true;
-  }
-
-  // Otherwise, there are more decodings to be done!
-
-  // Emit code to match the island(s) for the singleton.
-  o.indent(Indentation) << "// Check ";
-
-  for (I = Size; I != 0; --I) {
-    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
-    if (I > 1)
-      o << " && ";
-    else
-      o << "for singleton decoding...\n";
-  }
-
-  o.indent(Indentation) << "if (";
-  if (emitPredicateMatch(o, Indentation, Opc)) {
-    o << " &&\n";
-    o.indent(Indentation+4);
+  // Emit the predicate table entry if one is needed.
+  emitPredicateTableEntry(TableInfo, Opc);
+
+  // Check any additional encoding fields needed.
+  for (unsigned I = Size; I != 0; --I) {
+    unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    TableInfo.Table.push_back(MCD::OPC_CheckField);
+    TableInfo.Table.push_back(StartBits[I-1]);
+    TableInfo.Table.push_back(NumBits);
+    uint8_t Buffer[8], *p;
+    encodeULEB128(FieldVals[I-1], Buffer);
+    for (p = Buffer; *p >= 128 ; ++p)
+      TableInfo.Table.push_back(*p);
+    TableInfo.Table.push_back(*p);
+    // Push location for NumToSkip backpatching.
+    TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+    // The fixup is always 16-bits, so go ahead and allocate the space
+    // in the table so all our relative position calculations work OK even
+    // before we fully resolve the real value here.
+    TableInfo.Table.push_back(0);
+    TableInfo.Table.push_back(0);
   }
 
-  for (I = Size; I != 0; --I) {
-    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
-    o << "fieldFromInstruction" << BitWidth << "(insn, "
-      << StartBits[I-1] << ", " << NumBits
-      << ") == " << FieldVals[I-1];
-    if (I > 1)
-      o << " && ";
-    else
-      o << ") {\n";
-  }
-  emitSoftFailCheck(o, Indentation+2, Opc);
-  o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
-    Operands.find(Opc);
-  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
-  for (std::vector<OperandInfo>::const_iterator
-       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
-    // If a custom instruction decoder was specified, use that.
-    if (I->numFields() == 0 && I->Decoder.size()) {
-      o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                            << "(MI, insn, Address, Decoder)"
-                            << Emitter->GuardPostfix << "\n";
-      break;
-    }
-
-    emitBinaryParser(o, Indentation, *I);
-  }
-  o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
-                        << nameWithID(Opc) << '\n';
-  o.indent(Indentation) << "}\n";
-
-  return false;
+  // Check for soft failure of the match.
+  emitSoftFailTableEntry(TableInfo, Opc);
+
+  TableInfo.Table.push_back(MCD::OPC_Decode);
+  uint8_t Buffer[8], *p;
+  encodeULEB128(Opc, Buffer);
+  for (p = Buffer; *p >= 128 ; ++p)
+    TableInfo.Table.push_back(*p);
+  TableInfo.Table.push_back(*p);
+
+  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc);
+  SmallString<16> Bytes;
+  raw_svector_ostream S(Bytes);
+  encodeULEB128(DIdx, S);
+  S.flush();
+
+  // Decoder index
+  for (unsigned i = 0, e = Bytes.size(); i != e; ++i)
+    TableInfo.Table.push_back(Bytes[i]);
 }
 
-// Emits code to decode the singleton, and then to decode the rest.
-void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         const Filter &Best) const {
-
+// Emits table entries to decode the singleton, and then to decode the rest.
+void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                                            const Filter &Best) const {
   unsigned Opc = Best.getSingletonOpc();
 
-  emitSingletonDecoder(o, Indentation, Opc);
+  // complex singletons need predicate checks from the first singleton
+  // to refer forward to the variable filterchooser that follows.
+  TableInfo.FixupStack.push_back(FixupList());
 
-  // Emit code for the rest.
-  o.indent(Indentation) << "else\n";
+  emitSingletonTableEntry(TableInfo, Opc);
 
-  Indentation += 2;
-  Best.getVariableFC().emit(o, Indentation);
-  Indentation -= 2;
+  resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                     TableInfo.Table.size());
+  TableInfo.FixupStack.pop_back();
+
+  Best.getVariableFC().emitTableEntries(TableInfo);
 }
 
+
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
@@ -1119,7 +1432,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     }
   }
 
-  unsigned BitIndex, InsnIndex;
+  unsigned BitIndex;
 
   // We maintain BIT_WIDTH copies of the bitAttrs automaton.
   // The automaton consumes the corresponding bit from each
@@ -1149,7 +1462,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     else
       bitAttrs.push_back(ATTR_NONE);
 
-  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+  for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
     insn_t insn;
 
     insnWithID(insn, Opcodes[InsnIndex]);
@@ -1200,7 +1513,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   bitAttr_t RA = ATTR_NONE;
   unsigned StartBit = 0;
 
-  for (BitIndex = 0; BitIndex < BitWidth; BitIndex++) {
+  for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
     bitAttr_t bitAttr = bitAttrs[BitIndex];
 
     assert(bitAttr != ATTR_NONE && "Bit without attributes");
@@ -1341,36 +1654,29 @@ void FilterChooser::doFilter() {
   BestIndex = -1;
 }
 
-// Emits code to decode our share of instructions.  Returns true if the
-// emitted code causes a return, which occurs if we know how to decode
-// the instruction at this level or the instruction is not decodeable.
-bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
-  if (Opcodes.size() == 1)
+// emitTableEntries - Emit state machine entries to decode our share of
+// instructions.
+void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
+  if (Opcodes.size() == 1) {
     // There is only one instruction in the set, which is great!
     // Call emitSingletonDecoder() to see whether there are any remaining
     // encodings bits.
-    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+    emitSingletonTableEntry(TableInfo, Opcodes[0]);
+    return;
+  }
 
   // Choose the best filter to do the decodings!
   if (BestIndex != -1) {
     const Filter &Best = Filters[BestIndex];
     if (Best.getNumFiltered() == 1)
-      emitSingletonDecoder(o, Indentation, Best);
+      emitSingletonTableEntry(TableInfo, Best);
     else
-      Best.emit(o, Indentation);
-    return false;
+      Best.emitTableEntry(TableInfo);
+    return;
   }
 
-  // We don't know how to decode these instructions!  Return 0 and dump the
-  // conflict set!
-  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
-  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
-    o << nameWithID(Opcodes[i]);
-    if (i < (N - 1))
-      o << ", ";
-    else
-      o << '\n';
-  }
+  // We don't know how to decode these instructions!  Dump the
+  // conflict set and bail.
 
   // Print out useful conflict information for postmortem analysis.
   errs() << "Decoding Conflict:\n";
@@ -1385,8 +1691,6 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
              getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
     errs() << '\n';
   }
-
-  return true;
 }
 
 static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
@@ -1549,62 +1853,168 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
   return true;
 }
 
-static void emitHelper(llvm::raw_ostream &o, unsigned BitWidth) {
-  unsigned Indentation = 0;
-  std::string WidthStr = "uint" + utostr(BitWidth) + "_t";
-
-  o << '\n';
-
-  o.indent(Indentation) << "static " << WidthStr <<
-    " fieldFromInstruction" << BitWidth <<
-    "(" << WidthStr <<" insn, unsigned startBit, unsigned numBits)\n";
-
-  o.indent(Indentation) << "{\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "assert(startBit + numBits <= " << BitWidth
-                        << " && \"Instruction field out of bounds!\");\n";
-  o << '\n';
-  o.indent(Indentation) << WidthStr << " fieldMask;\n";
-  o << '\n';
-  o.indent(Indentation) << "if (numBits == " << BitWidth << ")\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = (" << WidthStr << ")-1;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "else\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
-  --Indentation; --Indentation;
-
-  o << '\n';
-  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
+// emitFieldFromInstruction - Emit the templated helper function
+// fieldFromInstruction().
+static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
+  OS << "// Helper function for extracting fields from encoded instructions.\n"
+     << "template<typename InsnType>\n"
+   << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
+     << "                                     unsigned numBits) {\n"
+     << "    assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n"
+     << "           \"Instruction field out of bounds!\");\n"
+     << "    InsnType fieldMask;\n"
+     << "    if (numBits == sizeof(InsnType)*8)\n"
+     << "      fieldMask = (InsnType)(-1LL);\n"
+     << "    else\n"
+     << "      fieldMask = ((1 << numBits) - 1) << startBit;\n"
+     << "    return (insn & fieldMask) >> startBit;\n"
+     << "}\n\n";
+}
 
-  o << '\n';
+// emitDecodeInstruction - Emit the templated helper function
+// decodeInstruction().
+static void emitDecodeInstruction(formatted_raw_ostream &OS) {
+  OS << "template<typename InsnType>\n"
+     << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,\n"
+     << "                                      InsnType insn, uint64_t Address,\n"
+     << "                                      const void *DisAsm,\n"
+     << "                                      const MCSubtargetInfo &STI) {\n"
+     << "  uint64_t Bits = STI.getFeatureBits();\n"
+     << "\n"
+     << "  const uint8_t *Ptr = DecodeTable;\n"
+     << "  uint32_t CurFieldValue;\n"
+     << "  DecodeStatus S = MCDisassembler::Success;\n"
+     << "  for (;;) {\n"
+     << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
+     << "    switch (*Ptr) {\n"
+     << "    default:\n"
+     << "      errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
+     << "      return MCDisassembler::Fail;\n"
+     << "    case MCD::OPC_ExtractField: {\n"
+     << "      unsigned Start = *++Ptr;\n"
+     << "      unsigned Len = *++Ptr;\n"
+     << "      ++Ptr;\n"
+     << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << \", \"\n"
+     << "                   << Len << \"): \" << CurFieldValue << \"\\n\");\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_FilterValue: {\n"
+     << "      // Decode the field value.\n"
+     << "      unsigned Len;\n"
+     << "      InsnType Val = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // Perform the filter operation.\n"
+     << "      if (Val != CurFieldValue)\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << \", \" << NumToSkip\n"
+     << "                   << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" : \"PASS:\")\n"
+     << "                   << \" continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
+     << "\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_CheckField: {\n"
+     << "      unsigned Start = *++Ptr;\n"
+     << "      unsigned Len = *++Ptr;\n"
+     << "      InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      // Decode the field value.\n"
+     << "      uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // If the actual and expected values don't match, skip.\n"
+     << "      if (ExpectedValue != FieldValue)\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << \", \"\n"
+     << "                   << Len << \", \" << ExpectedValue << \", \" << NumToSkip\n"
+     << "                   << \"): FieldValue = \" << FieldValue << \", ExpectedValue = \"\n"
+     << "                   << ExpectedValue << \": \"\n"
+     << "                   << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_CheckPredicate: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Predicate Index value.\n"
+     << "      unsigned PIdx = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      // Check the predicate.\n"
+     << "      bool Pred;\n"
+     << "      if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      (void)Pred;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx << \"): \"\n"
+     << "            << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_Decode: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Opcode value.\n"
+     << "      unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
+     << "                   << \", using decoder \" << DecodeIdx << \"\\n\" );\n"
+     << "      DEBUG(dbgs() << \"----- DECODE SUCCESSFUL -----\\n\");\n"
+     << "\n"
+     << "      MI.setOpcode(Opc);\n"
+     << "      return decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm);\n"
+     << "    }\n"
+     << "    case MCD::OPC_SoftFail: {\n"
+     << "      // Decode the mask values.\n"
+     << "      unsigned Len;\n"
+     << "      InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n"
+     << "      if (Fail)\n"
+     << "        S = MCDisassembler::SoftFail;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? \"FAIL\\n\":\"PASS\\n\"));\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_Fail: {\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n"
+     << "      return MCDisassembler::Fail;\n"
+     << "    }\n"
+     << "    }\n"
+     << "  }\n"
+     << "  llvm_unreachable(\"bogosity detected in disassembler state machine!\");\n"
+     << "}\n\n";
 }
 
 // Emits disassembler code for instruction decoding.
 void FixedLenDecoderEmitter::run(raw_ostream &o) {
-  o << "#include \"llvm/MC/MCInst.h\"\n";
-  o << "#include \"llvm/Support/DataTypes.h\"\n";
-  o << "#include <assert.h>\n";
-  o << '\n';
-  o << "namespace llvm {\n\n";
+  formatted_raw_ostream OS(o);
+  OS << "#include \"llvm/MC/MCInst.h\"\n";
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/DataTypes.h\"\n";
+  OS << "#include \"llvm/Support/LEB128.h\"\n";
+  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
+  OS << "#include <assert.h>\n";
+  OS << '\n';
+  OS << "namespace llvm {\n\n";
+
+  emitFieldFromInstruction(OS);
 
   // Parameterize the decoders based on namespace and instruction width.
-  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
-    Target.getInstructionsByEnumValue();
+  NumberedInstructions = &Target.getInstructionsByEnumValue();
   std::map<std::pair<std::string, unsigned>,
            std::vector<unsigned> > OpcMap;
   std::map<unsigned, std::vector<OperandInfo> > Operands;
 
-  for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
-    const CodeGenInstruction *Inst = NumberedInstructions[i];
+  for (unsigned i = 0; i < NumberedInstructions->size(); ++i) {
+    const CodeGenInstruction *Inst = NumberedInstructions->at(i);
     const Record *Def = Inst->TheDef;
     unsigned Size = Def->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
@@ -1622,24 +2032,48 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
     }
   }
 
+  DecoderTableInfo TableInfo;
   std::set<unsigned> Sizes;
   for (std::map<std::pair<std::string, unsigned>,
                 std::vector<unsigned> >::const_iterator
        I = OpcMap.begin(), E = OpcMap.end(); I != E; ++I) {
-    // If we haven't visited this instruction width before, emit the
-    // helper method to extract fields.
-    if (!Sizes.count(I->first.second)) {
-      emitHelper(o, 8*I->first.second);
-      Sizes.insert(I->first.second);
-    }
-
     // Emit the decoder for this namespace+width combination.
-    FilterChooser FC(NumberedInstructions, I->second, Operands,
+    FilterChooser FC(*NumberedInstructions, I->second, Operands,
                      8*I->first.second, this);
-    FC.emitTop(o, 0, I->first.first);
+
+    // The decode table is cleared for each top level decoder function. The
+    // predicates and decoders themselves, however, are shared across all
+    // decoders to give more opportunities for uniqueing.
+    TableInfo.Table.clear();
+    TableInfo.FixupStack.clear();
+    TableInfo.Table.reserve(16384);
+    TableInfo.FixupStack.push_back(FixupList());
+    FC.emitTableEntries(TableInfo);
+    // Any NumToSkip fixups in the top level scope can resolve to the
+    // OPC_Fail at the end of the table.
+    assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
+    // Resolve any NumToSkip fixups in the current scope.
+    resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                       TableInfo.Table.size());
+    TableInfo.FixupStack.clear();
+
+    TableInfo.Table.push_back(MCD::OPC_Fail);
+
+    // Print the table to the output stream.
+    emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), I->first.first);
+    OS.flush();
   }
 
-  o << "\n} // End llvm namespace \n";
+  // Emit the predicate function.
+  emitPredicateFunction(OS, TableInfo.Predicates, 0);
+
+  // Emit the decoder function.
+  emitDecoderFunction(OS, TableInfo.Decoders, 0);
+
+  // Emit the main entry point for the decoder, decodeInstruction().
+  emitDecodeInstruction(OS);
+
+  OS << "\n} // End llvm namespace\n";
 }
 
 namespace llvm {
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 3adb869..b41ad94 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -319,6 +319,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isCompare)          OS << "|(1<<MCID::Compare)";
   if (Inst.isMoveImm)          OS << "|(1<<MCID::MoveImm)";
   if (Inst.isBitcast)          OS << "|(1<<MCID::Bitcast)";
+  if (Inst.isSelect)           OS << "|(1<<MCID::Select)";
   if (Inst.isBarrier)          OS << "|(1<<MCID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1<<MCID::DelaySlot)";
   if (Inst.isCall)             OS << "|(1<<MCID::Call)";
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 3d8d515..02546df 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -145,9 +145,9 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
     if (!Namespace.empty())
       OS << "namespace " << Namespace << " {\n";
     OS << "enum {\n  NoSubRegister,\n";
-    for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i)
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
       OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
-    OS << "  NUM_TARGET_NAMED_SUBREGS\n};\n";
+    OS << "  NUM_TARGET_SUBREGS\n};\n";
     if (!Namespace.empty())
       OS << "}\n";
   }
@@ -479,16 +479,12 @@ public:
   }
 };
 
-static void printRegister(raw_ostream &OS, const CodeGenRegister *Reg) {
-  OS << getQualifiedName(Reg->TheDef);
-}
-
 static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
   OS << getEnumName(VT);
 }
 
 static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
-  OS << Idx->getQualifiedName();
+  OS << Idx->EnumValue;
 }
 
 // Differentially encoded register and regunit lists allow for better
@@ -517,6 +513,19 @@ DiffVec &diffEncode(DiffVec &V, unsigned InitVal, ArrayRef<unsigned> List) {
   return V;
 }
 
+template<typename Iter>
+static
+DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
+  assert(V.empty() && "Clear DiffVec before diffEncode.");
+  uint16_t Val = uint16_t(InitVal);
+  for (Iter I = Begin; I != End; ++I) {
+    uint16_t Cur = (*I)->EnumValue;
+    V.push_back(Cur - Val);
+    Val = Cur;
+  }
+  return V;
+}
+
 static void printDiff16(raw_ostream &OS, uint16_t Val) {
   OS << Val;
 }
@@ -537,15 +546,21 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // The lists of sub-registers, super-registers, and overlaps all go in the
   // same array. That allows us to share suffixes.
   typedef std::vector<const CodeGenRegister*> RegVec;
-  SmallVector<RegVec, 4> SubRegLists(Regs.size());
-  SmallVector<RegVec, 4> OverlapLists(Regs.size());
-  SequenceToOffsetTable<RegVec, CodeGenRegister::Less> RegSeqs;
 
   // Differentially encoded lists.
   SequenceToOffsetTable<DiffVec> DiffSeqs;
+  SmallVector<DiffVec, 4> SubRegLists(Regs.size());
+  SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
+  SmallVector<DiffVec, 4> OverlapLists(Regs.size());
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
   SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());
 
+  // Keep track of sub-register names as well. These are not differentially
+  // encoded.
+  typedef SmallVector<const CodeGenSubRegIndex*, 4> SubRegIdxVec;
+  SequenceToOffsetTable<SubRegIdxVec> SubRegIdxSeqs;
+  SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());
+
   SequenceToOffsetTable<std::string> RegStrings;
 
   // Precompute register lists for the SequenceToOffsetTable.
@@ -557,37 +572,29 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
     // Compute the ordered sub-register list.
     SetVector<const CodeGenRegister*> SR;
     Reg->addSubRegsPreOrder(SR, RegBank);
-    RegVec &SubRegList = SubRegLists[i];
-    SubRegList.assign(SR.begin(), SR.end());
-    RegSeqs.add(SubRegList);
+    diffEncode(SubRegLists[i], Reg->EnumValue, SR.begin(), SR.end());
+    DiffSeqs.add(SubRegLists[i]);
+
+    // Compute the corresponding sub-register indexes.
+    SubRegIdxVec &SRIs = SubRegIdxLists[i];
+    for (unsigned j = 0, je = SR.size(); j != je; ++j)
+      SRIs.push_back(Reg->getSubRegIndex(SR[j]));
+    SubRegIdxSeqs.add(SRIs);
 
     // Super-registers are already computed.
     const RegVec &SuperRegList = Reg->getSuperRegs();
-    RegSeqs.add(SuperRegList);
-
-    // The list of overlaps doesn't need to have any particular order, except
-    // Reg itself must be the first element. Pick an ordering that has one of
-    // the other lists as a suffix.
-    RegVec &OverlapList = OverlapLists[i];
-    const RegVec &Suffix = SubRegList.size() > SuperRegList.size() ?
-                           SubRegList : SuperRegList;
-    CodeGenRegister::Set Omit(Suffix.begin(), Suffix.end());
+    diffEncode(SuperRegLists[i], Reg->EnumValue,
+               SuperRegList.begin(), SuperRegList.end());
+    DiffSeqs.add(SuperRegLists[i]);
 
-    // First element is Reg itself.
-    OverlapList.push_back(Reg);
-    Omit.insert(Reg);
-
-    // Any elements not in Suffix.
+    // The list of overlaps doesn't need to have any particular order, and Reg
+    // itself must be omitted.
+    DiffVec &OverlapList = OverlapLists[i];
     CodeGenRegister::Set OSet;
     Reg->computeOverlaps(OSet, RegBank);
-    std::set_difference(OSet.begin(), OSet.end(),
-                        Omit.begin(), Omit.end(),
-                        std::back_inserter(OverlapList),
-                        CodeGenRegister::Less());
-
-    // Finally, Suffix itself.
-    OverlapList.insert(OverlapList.end(), Suffix.begin(), Suffix.end());
-    RegSeqs.add(OverlapList);
+    OSet.erase(Reg);
+    diffEncode(OverlapList, Reg->EnumValue, OSet.begin(), OSet.end());
+    DiffSeqs.add(OverlapList);
 
     // Differentially encode the register unit list, seeded by register number.
     // First compute a scale factor that allows more diff-lists to be reused:
@@ -616,23 +623,23 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
 
   // Compute the final layout of the sequence table.
-  RegSeqs.layout();
   DiffSeqs.layout();
+  SubRegIdxSeqs.layout();
 
   OS << "namespace llvm {\n\n";
 
   const std::string &TargetName = Target.getName();
 
-  // Emit the shared table of register lists.
-  OS << "extern const uint16_t " << TargetName << "RegLists[] = {\n";
-  RegSeqs.emit(OS, printRegister);
-  OS << "};\n\n";
-
   // Emit the shared table of differential lists.
   OS << "extern const uint16_t " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
+  // Emit the table of sub-register indexes.
+  OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[] = {\n";
+  SubRegIdxSeqs.emit(OS, printSubRegIndex);
+  OS << "};\n\n";
+
   // Emit the string table.
   RegStrings.layout();
   OS << "extern const char " << TargetName << "RegStrings[] = {\n";
@@ -641,15 +648,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "extern const MCRegisterDesc " << TargetName
      << "RegDesc[] = { // Descriptors\n";
-  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0 },\n";
+  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";
 
   // Emit the register descriptors now.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     const CodeGenRegister *Reg = Regs[i];
     OS << "  { " << RegStrings.get(Reg->getName()) << ", "
-       << RegSeqs.get(OverlapLists[i]) << ", "
-       << RegSeqs.get(SubRegLists[i]) << ", "
-       << RegSeqs.get(Reg->getSuperRegs()) << ", "
+       << DiffSeqs.get(OverlapLists[i]) << ", "
+       << DiffSeqs.get(SubRegLists[i]) << ", "
+       << DiffSeqs.get(SuperRegLists[i]) << ", "
+       << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
        << (DiffSeqs.get(RegUnitLists[i])*16 + RegUnitInitScale[i]) << " },\n";
   }
   OS << "};\n\n";      // End of register descriptors...
@@ -729,37 +737,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "};\n\n";
 
-  // Emit the data table for getSubReg().
   ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices();
-  if (SubRegIndices.size()) {
-    OS << "const uint16_t " << TargetName << "SubRegTable[]["
-       << SubRegIndices.size() << "] = {\n";
-    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-      const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
-      OS << "  /* " << Regs[i]->TheDef->getName() << " */\n";
-      if (SRM.empty()) {
-        OS << "  {0},\n";
-        continue;
-      }
-      OS << "  {";
-      for (unsigned j = 0, je = SubRegIndices.size(); j != je; ++j) {
-        // FIXME: We really should keep this to 80 columns...
-        CodeGenRegister::SubRegMap::const_iterator SubReg =
-          SRM.find(SubRegIndices[j]);
-        if (SubReg != SRM.end())
-          OS << getQualifiedName(SubReg->second->TheDef);
-        else
-          OS << "0";
-        if (j != je - 1)
-          OS << ", ";
-      }
-      OS << "}" << (i != e ? "," : "") << "\n";
-    }
-    OS << "};\n\n";
-    OS << "const uint16_t *get" << TargetName
-       << "SubRegTable() {\n  return (const uint16_t *)" << TargetName
-       << "SubRegTable;\n}\n\n";
-  }
 
   EmitRegMappingTables(OS, Regs, false);
 
@@ -783,22 +761,17 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // MCRegisterInfo initialization routine.
   OS << "static inline void Init" << TargetName
      << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, "
-     << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n";
-  OS << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
+     << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n"
+     << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
      << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, "
      << RegisterClasses.size() << ", "
      << TargetName << "RegUnitRoots, "
      << RegBank.getNumNativeRegUnits() << ", "
-     << TargetName << "RegLists, "
      << TargetName << "RegDiffLists, "
-     << TargetName << "RegStrings, ";
-  if (SubRegIndices.size() != 0)
-    OS << "(uint16_t*)" << TargetName << "SubRegTable, "
-       << SubRegIndices.size() << ",\n";
-  else
-    OS << "NULL, 0,\n";
-
-  OS << "  " << TargetName << "RegEncodingTable);\n\n";
+     << TargetName << "RegStrings, "
+     << TargetName << "SubRegIdxLists, "
+     << SubRegIndices.size() << ",\n"
+     << "  " << TargetName << "RegEncodingTable);\n\n";
 
   EmitRegMapping(OS, Regs, false);
 
@@ -912,17 +885,6 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "\" };\n\n";
 
-  // Emit names of the anonymous subreg indices.
-  unsigned NamedIndices = RegBank.getNumNamedIndices();
-  if (SubRegIndices.size() > NamedIndices) {
-    OS << "  enum {";
-    for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) {
-      OS << "\n    " << SubRegIndices[i]->getName() << " = " << i+1;
-      if (i+1 != e)
-        OS << ',';
-    }
-    OS << "\n  };\n\n";
-  }
   OS << "\n";
 
   // Now that all of the structs have been emitted, emit the instances.
@@ -1148,13 +1110,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const uint16_t " << TargetName << "RegLists[];\n";
   OS << "extern const uint16_t " << TargetName << "RegDiffLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const uint16_t " << TargetName << "RegUnitRoots[][2];\n";
-  if (SubRegIndices.size() != 0)
-    OS << "extern const uint16_t *get" << TargetName
-       << "SubRegTable();\n";
+  OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[];\n";
   OS << "extern const uint16_t " << TargetName << "RegEncodingTable[];\n";
 
   EmitRegMappingTables(OS, Regs, true);
@@ -1169,17 +1128,11 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
      << "MCRegisterClasses, " << RegisterClasses.size() << ",\n"
      << "                     " << TargetName << "RegUnitRoots,\n"
      << "                     " << RegBank.getNumNativeRegUnits() << ",\n"
-     << "                     " << TargetName << "RegLists,\n"
      << "                     " << TargetName << "RegDiffLists,\n"
      << "                     " << TargetName << "RegStrings,\n"
-     << "                     ";
-  if (SubRegIndices.size() != 0)
-    OS << "get" << TargetName << "SubRegTable(), "
-       << SubRegIndices.size() << ",\n";
-  else
-    OS << "NULL, 0,\n";
-
-  OS << "                     " << TargetName << "RegEncodingTable);\n\n";
+     << "                     " << TargetName << "SubRegIdxLists,\n"
+     << "                     " << SubRegIndices.size() << ",\n"
+     << "                     " << TargetName << "RegEncodingTable);\n\n";
 
   EmitRegMapping(OS, Regs, true);
 
diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h
index 803f5bd..a098d7d 100644
--- a/utils/TableGen/StringToOffsetTable.h
+++ b/utils/TableGen/StringToOffsetTable.h
@@ -14,6 +14,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cctype>
 
 namespace llvm {
 
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index b3bf4aa..3472343 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -590,6 +590,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
+    EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
     if (SchedModels.hasItineraryClasses())
       OS << "  " << PI->ItinsDef->getName();
     else
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index 0417e9d..c13a0cc 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -14,6 +14,7 @@
 #include <string.h>
 
 #define INSTRUCTION_SPECIFIER_FIELDS       \
+  struct OperandSpecifier operands[X86_MAX_OPERANDS]; \
   bool                    filtered;        \
   InstructionContext      insnContext;     \
   std::string             name;            \
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 2875168..f3bd373 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -21,10 +21,11 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include <map>
 
 using namespace llvm;
 using namespace X86Disassembler;
-  
+
 /// inheritsFrom - Indicates whether all instructions in one class also belong
 ///   to another class.
 ///
@@ -36,7 +37,7 @@ static inline bool inheritsFrom(InstructionContext child,
                                 bool VEX_LIG = false) {
   if (child == parent)
     return true;
-  
+
   switch (parent) {
   case IC:
     return(inheritsFrom(child, IC_64BIT) ||
@@ -117,17 +118,17 @@ static inline bool inheritsFrom(InstructionContext child,
 /// @param upper  - The class that may be preferable
 /// @param lower  - The class that may be less preferable
 /// @return       - True if upper is to be preferred, false otherwise.
-static inline bool outranks(InstructionContext upper, 
+static inline bool outranks(InstructionContext upper,
                             InstructionContext lower) {
   assert(upper < IC_max);
   assert(lower < IC_max);
-  
+
 #define ENUM_ENTRY(n, r, d) r,
   static int ranks[IC_max] = {
     INSTRUCTION_CONTEXTS
   };
 #undef ENUM_ENTRY
-  
+
   return (ranks[upper] > ranks[lower]);
 }
 
@@ -170,24 +171,22 @@ static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
   }
 }
 
-void DisassemblerTables::emitOneID(raw_ostream &o,
-                                   uint32_t &i,
-                                   InstrUID id,
+void DisassemblerTables::emitOneID(raw_ostream &o, unsigned &i, InstrUID id,
                                    bool addComma) const {
   if (id)
     o.indent(i * 2) << format("0x%hx", id);
   else
     o.indent(i * 2) << 0;
-  
+
   if (addComma)
     o << ", ";
   else
     o << "  ";
-  
+
   o << "/* ";
   o << InstructionSpecifiers[id].name;
   o << "*/";
-  
+
   o << "\n";
 }
 
@@ -197,8 +196,7 @@ void DisassemblerTables::emitOneID(raw_ostream &o,
 ///
 /// @param o        - The output stream on which to emit the table.
 /// @param i        - The indentation level for that output stream.
-static void emitEmptyTable(raw_ostream &o, uint32_t &i)
-{
+static void emitEmptyTable(raw_ostream &o, unsigned &i) {
   o.indent(i * 2) << "0x0, /* EmptyTable */\n";
 }
 
@@ -207,15 +205,12 @@ static void emitEmptyTable(raw_ostream &o, uint32_t &i)
 ///
 /// @param decision - The decision to be compacted.
 /// @return         - The compactest available representation for the decision.
-static ModRMDecisionType getDecisionType(ModRMDecision &decision)
-{
+static ModRMDecisionType getDecisionType(ModRMDecision &decision) {
   bool satisfiesOneEntry = true;
   bool satisfiesSplitRM = true;
   bool satisfiesSplitReg = true;
 
-  uint16_t index;
-
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     if (decision.instructionIDs[index] != decision.instructionIDs[0])
       satisfiesOneEntry = false;
 
@@ -252,27 +247,25 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision)
 ///   to a particular decision type.
 ///
 /// @param dt - The decision type.
-/// @return   - A pointer to the statically-allocated string (e.g., 
+/// @return   - A pointer to the statically-allocated string (e.g.,
 ///             "MODRM_ONEENTRY" for MODRM_ONEENTRY).
-static const char* stringForDecisionType(ModRMDecisionType dt)
-{
+static const char* stringForDecisionType(ModRMDecisionType dt) {
 #define ENUM_ENTRY(n) case n: return #n;
   switch (dt) {
     default:
-      llvm_unreachable("Unknown decision type");  
+      llvm_unreachable("Unknown decision type");
     MODRMTYPES
-  };  
+  };
 #undef ENUM_ENTRY
 }
-  
+
 /// stringForModifierType - Returns a statically-allocated string corresponding
 ///   to an opcode modifier type.
 ///
 /// @param mt - The modifier type.
 /// @return   - A pointer to the statically-allocated string (e.g.,
 ///             "MODIFIER_NONE" for MODIFIER_NONE).
-static const char* stringForModifierType(ModifierType mt)
-{
+static const char* stringForModifierType(ModifierType mt) {
 #define ENUM_ENTRY(n) case n: return #n;
   switch(mt) {
     default:
@@ -281,35 +274,31 @@ static const char* stringForModifierType(ModifierType mt)
   };
 #undef ENUM_ENTRY
 }
-  
+
 DisassemblerTables::DisassemblerTables() {
   unsigned i;
-  
+
   for (i = 0; i < array_lengthof(Tables); i++) {
     Tables[i] = new ContextDecision;
     memset(Tables[i], 0, sizeof(ContextDecision));
   }
-  
+
   HasConflicts = false;
 }
-  
+
 DisassemblerTables::~DisassemblerTables() {
   unsigned i;
-  
+
   for (i = 0; i < array_lengthof(Tables); i++)
     delete Tables[i];
 }
-  
-void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
-                                           raw_ostream &o2,
-                                           uint32_t &i1,
-                                           uint32_t &i2,
-                                           ModRMDecision &decision)
-  const {
-  static uint64_t sTableNumber = 0;
-  static uint64_t sEntryNumber = 1;
+
+void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
+                                           unsigned &i1, unsigned &i2,
+                                           ModRMDecision &decision) const {
+  static uint32_t sTableNumber = 0;
+  static uint32_t sEntryNumber = 1;
   ModRMDecisionType dt = getDecisionType(decision);
-  uint16_t index;
 
   if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
   {
@@ -338,13 +327,13 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
       emitOneID(o1, i1, decision.instructionIDs[0xc0], true); // mod = 0b11
       break;
     case MODRM_SPLITREG:
-      for (index = 0; index < 64; index += 8)
+      for (unsigned index = 0; index < 64; index += 8)
         emitOneID(o1, i1, decision.instructionIDs[index], true);
-      for (index = 0xc0; index < 256; index += 8)
+      for (unsigned index = 0xc0; index < 256; index += 8)
         emitOneID(o1, i1, decision.instructionIDs[index], true);
       break;
     case MODRM_FULL:
-      for (index = 0; index < 256; ++index)
+      for (unsigned index = 0; index < 256; ++index)
         emitOneID(o1, i1, decision.instructionIDs[index], true);
       break;
   }
@@ -380,20 +369,15 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
   ++sTableNumber;
 }
 
-void DisassemblerTables::emitOpcodeDecision(
-  raw_ostream &o1,
-  raw_ostream &o2,
-  uint32_t &i1,
-  uint32_t &i2,
-  OpcodeDecision &decision) const {
-  uint16_t index;
-
+void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2,
+                                            unsigned &i1, unsigned &i2,
+                                            OpcodeDecision &decision) const {
   o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
   i2++;
   o2.indent(i2) << "{" << "\n";
   i2++;
 
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     o2.indent(i2);
 
     o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
@@ -412,21 +396,16 @@ void DisassemblerTables::emitOpcodeDecision(
   o2.indent(i2) << "}" << "\n";
 }
 
-void DisassemblerTables::emitContextDecision(
-  raw_ostream &o1,
-  raw_ostream &o2,
-  uint32_t &i1,
-  uint32_t &i2,
-  ContextDecision &decision,
-  const char* name) const {
+void DisassemblerTables::emitContextDecision(raw_ostream &o1, raw_ostream &o2,
+                                             unsigned &i1, unsigned &i2,
+                                             ContextDecision &decision,
+                                             const char* name) const {
   o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n";
   i2++;
   o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
   i2++;
 
-  unsigned index;
-
-  for (index = 0; index < IC_max; ++index) {
+  for (unsigned index = 0; index < IC_max; ++index) {
     o2.indent(i2) << "/* ";
     o2 << stringForContext((InstructionContext)index);
     o2 << " */";
@@ -444,58 +423,81 @@ void DisassemblerTables::emitContextDecision(
   o2.indent(i2) << "};" << "\n";
 }
 
-void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) 
-  const {
+void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
+                                             unsigned &i) const {
+  unsigned NumInstructions = InstructionSpecifiers.size();
+
+  o << "static const struct OperandSpecifier x86OperandSets[]["
+    << X86_MAX_OPERANDS << "] = {\n";
+
+  typedef std::vector<std::pair<const char *, const char *> > OperandListTy;
+  std::map<OperandListTy, unsigned> OperandSets;
+
+  unsigned OperandSetNum = 0;
+  for (unsigned Index = 0; Index < NumInstructions; ++Index) {
+    OperandListTy OperandList;
+
+    for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS;
+         ++OperandIndex) {
+      const char *Encoding =
+        stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[Index]
+                                 .operands[OperandIndex].encoding);
+      const char *Type =
+        stringForOperandType((OperandType)InstructionSpecifiers[Index]
+                             .operands[OperandIndex].type);
+      OperandList.push_back(std::make_pair(Encoding, Type));
+    }
+    unsigned &N = OperandSets[OperandList];
+    if (N != 0) continue;
+
+    N = ++OperandSetNum;
+
+    o << "  { /* " << (OperandSetNum - 1) << " */\n";
+    for (unsigned i = 0, e = OperandList.size(); i != e; ++i) {
+      o << "    { " << OperandList[i].first << ", "
+        << OperandList[i].second << " },\n";
+    }
+    o << "  },\n";
+  }
+  o << "};" << "\n\n";
+
   o.indent(i * 2) << "static const struct InstructionSpecifier ";
   o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n";
-  
-  i++;
 
-  uint16_t numInstructions = InstructionSpecifiers.size();
-  uint16_t index, operandIndex;
+  i++;
 
-  for (index = 0; index < numInstructions; ++index) {
+  for (unsigned index = 0; index < NumInstructions; ++index) {
     o.indent(i * 2) << "{ /* " << index << " */" << "\n";
     i++;
 
     o.indent(i * 2) << stringForModifierType(
                        (ModifierType)InstructionSpecifiers[index].modifierType);
-    o << "," << "\n";
+    o << ",\n";
 
     o.indent(i * 2) << "0x";
     o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
-    o << "," << "\n";
-
-    o.indent(i * 2) << "{" << "\n";
-    i++;
-
-    for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
-      o.indent(i * 2) << "{ ";
-      o <<stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index]
-                                   .operands[operandIndex]
-                                   .encoding);
-      o << ", ";
-      o << stringForOperandType((OperandType)InstructionSpecifiers[index]
-                                .operands[operandIndex]
-                                .type);
-      o << " }";
-
-      if (operandIndex < X86_MAX_OPERANDS - 1)
-        o << ",";
-
-      o << "\n";
+    o << ",\n";
+
+    OperandListTy OperandList;
+    for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS;
+         ++OperandIndex) {
+      const char *Encoding =
+        stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index]
+                                 .operands[OperandIndex].encoding);
+      const char *Type =
+        stringForOperandType((OperandType)InstructionSpecifiers[index]
+                             .operands[OperandIndex].type);
+      OperandList.push_back(std::make_pair(Encoding, Type));
     }
+    o.indent(i * 2) << (OperandSets[OperandList] - 1) << ",\n";
 
-    i--;
-    o.indent(i * 2) << "}," << "\n";
-    
     o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */";
     o << "\n";
 
     i--;
     o.indent(i * 2) << "}";
 
-    if (index + 1 < numInstructions)
+    if (index + 1 < NumInstructions)
       o << ",";
 
     o << "\n";
@@ -505,14 +507,12 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
   o.indent(i * 2) << "};" << "\n";
 }
 
-void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
-  uint16_t index;
-
-  o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR
+void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
+  o.indent(i * 2) << "static const uint8_t " CONTEXTS_STR
                      "[256] = {\n";
   i++;
 
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     o.indent(i * 2);
 
     if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
@@ -545,7 +545,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
       o << "IC_64BIT_REXW_XS";
     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
       o << "IC_64BIT_REXW_XD";
-    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && 
+    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&
              (index & ATTR_OPSIZE))
       o << "IC_64BIT_REXW_OPSIZE";
     else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE))
@@ -593,11 +593,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
   o.indent(i * 2) << "};" << "\n";
 }
 
-void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
-                                            raw_ostream &o2,
-                                            uint32_t &i1,
-                                            uint32_t &i2)
-  const {
+void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2,
+                                             unsigned &i1, unsigned &i2) const {
   emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
   emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
   emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
@@ -607,15 +604,15 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
 }
 
 void DisassemblerTables::emit(raw_ostream &o) const {
-  uint32_t i1 = 0;
-  uint32_t i2 = 0;
-  
+  unsigned i1 = 0;
+  unsigned i2 = 0;
+
   std::string s1;
   std::string s2;
-  
+
   raw_string_ostream o1(s1);
   raw_string_ostream o2(s2);
-  
+
   emitInstructionInfo(o, i2);
   o << "\n";
 
@@ -641,9 +638,7 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
                                         const ModRMFilter &filter,
                                         InstrUID          uid,
                                         uint8_t           opcode) {
-  unsigned index;
-
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     if (filter.accepts(index)) {
       if (decision.instructionIDs[index] == uid)
         continue;
@@ -653,10 +648,10 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
           InstructionSpecifiers[uid];
         InstructionSpecifier &previousInfo =
           InstructionSpecifiers[decision.instructionIDs[index]];
-        
+
         if(newInfo.filtered)
           continue; // filtered instructions get lowest priority
-        
+
         if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" ||
                                            newInfo.name == "XCHG32ar" ||
                                            newInfo.name == "XCHG32ar64" ||
@@ -665,7 +660,7 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
 
         if (outranks(previousInfo.insnContext, newInfo.insnContext))
           continue;
-        
+
         if (previousInfo.insnContext == newInfo.insnContext &&
             !previousInfo.filtered) {
           errs() << "Error: Primary decode conflict: ";
@@ -690,17 +685,15 @@ void DisassemblerTables::setTableFields(OpcodeType          type,
                                         InstrUID            uid,
                                         bool                is32bit,
                                         bool                ignoresVEX_L) {
-  unsigned index;
-  
   ContextDecision &decision = *Tables[type];
 
-  for (index = 0; index < IC_max; ++index) {
+  for (unsigned index = 0; index < IC_max; ++index) {
     if (is32bit && inheritsFrom((InstructionContext)index, IC_64BIT))
       continue;
 
-    if (inheritsFrom((InstructionContext)index, 
+    if (inheritsFrom((InstructionContext)index,
                      InstructionSpecifiers[uid].insnContext, ignoresVEX_L))
-      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], 
+      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
                      filter,
                      uid,
                      opcode);
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index e148cd2..ea006c0 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -42,13 +42,13 @@ private:
   /// [4] three-byte opcodes of the form 0f a6 __
   /// [5] three-byte opcodes of the form 0f a7 __
   ContextDecision* Tables[6];
-  
+
   /// The instruction information table
   std::vector<InstructionSpecifier> InstructionSpecifiers;
-  
+
   /// True if there are primary decode conflicts in the instruction set
   bool HasConflicts;
-  
+
   /// emitOneID - Emits a table entry for a single instruction entry, at the
   ///   innermost level of the structure hierarchy.  The entry is printed out
   ///   in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
@@ -64,7 +64,7 @@ private:
                  uint32_t &i,
                  InstrUID id,
                  bool addComma) const;
-  
+
   /// emitModRMDecision - Emits a table of entries corresponding to a single
   ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M
   ///   decisions are printed as:
@@ -77,12 +77,12 @@ private:
   ///   where nnnn is a unique ID for the corresponding table of IDs.
   ///   TYPE indicates whether the table has one entry that is the same
   ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
-  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.  
+  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
   ///   nnnn is the number of a table for looking up these values.  The tables
   ///   are written separately so that tables consisting entirely of zeros will
   ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
   ///   is printed as:
-  ///   
+  ///
   ///   InstrUID modRMTablennnn[k] = {
   ///     nnnn, /* MNEMONIC */
   ///     ...
@@ -100,7 +100,7 @@ private:
                          uint32_t &i1,
                          uint32_t &i2,
                          ModRMDecision &decision) const;
-  
+
   /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
   ///   decisions.  An OpcodeDecision is printed as:
   ///
@@ -129,8 +129,8 @@ private:
                           uint32_t &i1,
                           uint32_t &i2,
                           OpcodeDecision &decision) const;
-  
-  /// emitContextDecision - Emits a ContextDecision and all its subsidiary 
+
+  /// emitContextDecision - Emits a ContextDecision and all its subsidiary
   ///   Opcode and ModRMDecisions.  A ContextDecision is printed as:
   ///
   ///   struct ContextDecision NAME = {
@@ -163,10 +163,10 @@ private:
   void emitContextDecision(raw_ostream &o1,
                            raw_ostream &o2,
                            uint32_t &i1,
-                           uint32_t &i2,                           
+                           uint32_t &i2,
                            ContextDecision &decision,
                            const char* name) const;
-  
+
   /// emitInstructionInfo - Prints the instruction specifier table, which has
   ///   one entry for each instruction, and contains name and operand
   ///   information.  This table is printed as:
@@ -187,17 +187,17 @@ private:
   ///   };
   ///
   ///   k is the total number of instructions.
-  ///   nnnn is the ID of the current instruction (0-based).  This table 
+  ///   nnnn is the ID of the current instruction (0-based).  This table
   ///   includes entries for non-instructions like PHINODE.
   ///   0xnn is the lowest possible opcode for the current instruction, used for
   ///   AddRegFrm instructions to compute the operand's value.
   ///   ENCODING and TYPE describe the encoding and type for a single operand.
   ///
-  /// @param o  - The output stream to which the instruction table should be 
+  /// @param o  - The output stream to which the instruction table should be
   ///             written.
   /// @param i  - The indent level for use with the stream.
   void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
-  
+
   /// emitContextTable - Prints the table that is used to translate from an
   ///   instruction attribute mask to an instruction context.  This table is
   ///   printed as:
@@ -213,7 +213,7 @@ private:
   /// @param o  - The output stream to which the context table should be written.
   /// @param i  - The indent level for use with the stream.
   void emitContextTable(raw_ostream &o, uint32_t &i) const;
-  
+
   /// emitContextDecisions - Prints all four ContextDecision structures using
   ///   emitContextDecision().
   ///
@@ -225,7 +225,7 @@ private:
   void emitContextDecisions(raw_ostream &o1,
                             raw_ostream &o2,
                             uint32_t &i1,
-                            uint32_t &i2) const; 
+                            uint32_t &i2) const;
 
   /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
   ///   ModRMDecision to refer to a particular instruction ID.
@@ -241,14 +241,14 @@ private:
 public:
   /// Constructor - Allocates space for the class decisions and clears them.
   DisassemblerTables();
-  
+
   ~DisassemblerTables();
-  
+
   /// emit - Emits the instruction table, context table, and class decisions.
   ///
   /// @param o  - The output stream to print the tables to.
   void emit(raw_ostream &o) const;
-  
+
   /// setTableFields - Uses the opcode type, instruction context, opcode, and a
   ///   ModRMFilter as criteria to set a particular set of entries in the
   ///   decode tables to point to a specific uid.
@@ -268,24 +268,24 @@ public:
                       const ModRMFilter &filter,
                       InstrUID uid,
                       bool is32bit,
-                      bool ignoresVEX_L);  
-  
+                      bool ignoresVEX_L);
+
   /// specForUID - Returns the instruction specifier for a given unique
   ///   instruction ID.  Used when resolving collisions.
   ///
   /// @param uid  - The unique ID of the instruction.
-  /// @return     - A reference to the instruction specifier. 
+  /// @return     - A reference to the instruction specifier.
   InstructionSpecifier& specForUID(InstrUID uid) {
     if (uid >= InstructionSpecifiers.size())
       InstructionSpecifiers.resize(uid + 1);
-    
+
     return InstructionSpecifiers[uid];
   }
-  
+
   // hasConflicts - Reports whether there were primary decode conflicts
   //   from any instructions added to the tables.
   // @return  - true if there were; false otherwise.
-  
+
   bool hasConflicts() {
     return HasConflicts;
   }
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 6a685ff..7ac2336 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -57,7 +57,7 @@ namespace X86Local {
     MRMDestMem  = 4,
     MRMSrcReg   = 5,
     MRMSrcMem   = 6,
-    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, 
+    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,
     MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
     MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
     MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
@@ -69,7 +69,7 @@ namespace X86Local {
 #undef MAP
     lastMRM
   };
-  
+
   enum {
     TB  = 1,
     REP = 2,
@@ -82,17 +82,17 @@ namespace X86Local {
 }
 
 // If rows are added to the opcode extension tables, then corresponding entries
-// must be added here.  
+// must be added here.
 //
 // If the row corresponds to a single byte (i.e., 8f), then add an entry for
 // that byte to ONE_BYTE_EXTENSION_TABLES.
 //
-// If the row corresponds to two bytes where the first is 0f, add an entry for 
+// If the row corresponds to two bytes where the first is 0f, add an entry for
 // the second byte to TWO_BYTE_EXTENSION_TABLES.
 //
 // If the row corresponds to some other set of bytes, you will need to modify
 // the code in RecognizableInstr::emitDecodePath() as well, and add new prefixes
-// to the X86 TD files, except in two cases: if the first two bytes of such a 
+// to the X86 TD files, except in two cases: if the first two bytes of such a
 // new combination are 0f 38 or 0f 3a, you just have to add maps called
 // THREE_BYTE_38_EXTENSION_TABLES and THREE_BYTE_3A_EXTENSION_TABLES and add a
 // switch(Opcode) just below the case X86Local::T8: or case X86Local::TA: line
@@ -116,7 +116,7 @@ namespace X86Local {
   EXTENSION_TABLE(f7)             \
   EXTENSION_TABLE(fe)             \
   EXTENSION_TABLE(ff)
-  
+
 #define TWO_BYTE_EXTENSION_TABLES \
   EXTENSION_TABLE(00)             \
   EXTENSION_TABLE(01)             \
@@ -134,7 +134,7 @@ namespace X86Local {
 using namespace X86Disassembler;
 
 /// needsModRMForDecode - Indicates whether a particular instruction requires a
-///   ModR/M byte for the instruction to be properly decoded.  For example, a 
+///   ModR/M byte for the instruction to be properly decoded.  For example, a
 ///   MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to
 ///   0b11.
 ///
@@ -213,17 +213,17 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   Rec = insn.TheDef;
   Name = Rec->getName();
   Spec = &tables.specForUID(UID);
-  
+
   if (!Rec->isSubClassOf("X86Inst")) {
     ShouldBeEmitted = false;
     return;
   }
-  
+
   Prefix   = byteFromRec(Rec, "Prefix");
   Opcode   = byteFromRec(Rec, "Opcode");
   Form     = byteFromRec(Rec, "FormBits");
   SegOvr   = byteFromRec(Rec, "SegOvrBits");
-  
+
   HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
   HasAdSizePrefix  = Rec->getValueAsBit("hasAdSizePrefix");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
@@ -235,12 +235,12 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
   HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
   IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
-  
+
   Name      = Rec->getName();
   AsmString = Rec->getValueAsString("AsmString");
-  
+
   Operands = &insn.Operands.OperandList;
-  
+
   IsSSE            = (HasOpSizePrefix && (Name.find("16") == Name.npos)) ||
                      (Name.find("CRC32") != Name.npos);
   HasFROperands    = hasFROperands();
@@ -262,20 +262,20 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
     }
   }
   // FIXME: These instructions aren't marked as 64-bit in any way
-  Is64Bit |= Rec->getName() == "JMP64pcrel32" || 
-             Rec->getName() == "MASKMOVDQU64" || 
-             Rec->getName() == "POPFS64" || 
-             Rec->getName() == "POPGS64" || 
-             Rec->getName() == "PUSHFS64" || 
+  Is64Bit |= Rec->getName() == "JMP64pcrel32" ||
+             Rec->getName() == "MASKMOVDQU64" ||
+             Rec->getName() == "POPFS64" ||
+             Rec->getName() == "POPGS64" ||
+             Rec->getName() == "PUSHFS64" ||
              Rec->getName() == "PUSHGS64" ||
              Rec->getName() == "REX64_PREFIX" ||
-             Rec->getName().find("MOV64") != Name.npos || 
+             Rec->getName().find("MOV64") != Name.npos ||
              Rec->getName().find("PUSH64") != Name.npos ||
              Rec->getName().find("POP64") != Name.npos;
 
   ShouldBeEmitted  = true;
 }
-  
+
 void RecognizableInstr::processInstr(DisassemblerTables &tables,
                                      const CodeGenInstruction &insn,
                                      InstrUID uid)
@@ -283,11 +283,11 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
   // Ignore "asm parser only" instructions.
   if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
     return;
-  
+
   RecognizableInstr recogInstr(tables, insn, uid);
-  
+
   recogInstr.emitInstructionSpecifier(tables);
-  
+
   if (recogInstr.shouldBeEmitted())
     recogInstr.emitDecodePath(tables);
 }
@@ -386,55 +386,40 @@ InstructionContext RecognizableInstr::insnContext() const {
 
   return insnContext;
 }
-  
+
 RecognizableInstr::filter_ret RecognizableInstr::filter() const {
   ///////////////////
   // FILTER_STRONG
   //
-    
+
   // Filter out intrinsics
-  
-  if (!Rec->isSubClassOf("X86Inst"))
-    return FILTER_STRONG;
-  
+
+  assert(Rec->isSubClassOf("X86Inst") && "Can only filter X86 instructions");
+
   if (Form == X86Local::Pseudo ||
       (IsCodeGenOnly && Name.find("_REV") == Name.npos))
     return FILTER_STRONG;
-  
-  if (Form == X86Local::MRMInitReg)
-    return FILTER_STRONG;
-    
-    
+
+
   // Filter out artificial instructions but leave in the LOCK_PREFIX so it is
   // printed as a separate "instruction".
-    
+
   if (Name.find("_Int") != Name.npos       ||
-      Name.find("Int_") != Name.npos       ||
-      Name.find("_NOREX") != Name.npos     ||
-      Name.find("2SDL") != Name.npos)
+      Name.find("Int_") != Name.npos)
     return FILTER_STRONG;
 
   // Filter out instructions with segment override prefixes.
   // They're too messy to handle now and we'll special case them if needed.
-    
+
   if (SegOvr)
     return FILTER_STRONG;
-    
-  // Filter out instructions that can't be printed.
-    
-  if (AsmString.size() == 0)
-    return FILTER_STRONG;
-   
-  // Filter out instructions with subreg operands.
-   
-  if (AsmString.find("subreg") != AsmString.npos)
-    return FILTER_STRONG;
+
 
   /////////////////
   // FILTER_WEAK
   //
 
-    
+
   // Filter out instructions with a LOCK prefix;
   //   prefer forms that do not have the prefix
   if (HasLockPrefix)
@@ -474,9 +459,9 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
     return FILTER_WEAK;
 
   if (HasFROperands && Name.find("MOV") != Name.npos &&
-     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || 
+     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
       (Name.find("to") != Name.npos)))
-    return FILTER_WEAK;
+    return FILTER_STRONG;
 
   return FILTER_NORMAL;
 }
@@ -487,7 +472,7 @@ bool RecognizableInstr::hasFROperands() const {
 
   for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     const std::string &recName = OperandList[operandIndex].Rec->getName();
-      
+
     if (recName.find("FR") != recName.npos)
       return true;
   }
@@ -497,17 +482,17 @@ bool RecognizableInstr::hasFROperands() const {
 bool RecognizableInstr::has256BitOperands() const {
   const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
   unsigned numOperands = OperandList.size();
-    
+
   for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     const std::string &recName = OperandList[operandIndex].Rec->getName();
-       
-    if (!recName.compare("VR256") || !recName.compare("f256mem")) {
+
+    if (!recName.compare("VR256")) {
       return true;
     }
   }
   return false;
 }
-  
+
 void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
                                       unsigned &physicalOperandIndex,
                                       unsigned &numPhysicalOperands,
@@ -521,33 +506,33 @@ void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
   } else {
     assert(physicalOperandIndex < numPhysicalOperands);
   }
-  
+
   while (operandMapping[operandIndex] != operandIndex) {
     Spec->operands[operandIndex].encoding = ENCODING_DUP;
     Spec->operands[operandIndex].type =
       (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]);
     ++operandIndex;
   }
-  
+
   const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
 
   Spec->operands[operandIndex].encoding = encodingFromString(typeName,
                                                               HasOpSizePrefix);
-  Spec->operands[operandIndex].type = typeFromString(typeName, 
+  Spec->operands[operandIndex].type = typeFromString(typeName,
                                                      IsSSE,
                                                      HasREX_WPrefix,
                                                      HasOpSizePrefix);
-  
+
   ++operandIndex;
   ++physicalOperandIndex;
 }
 
 void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   Spec->name       = Name;
-    
-  if (!Rec->isSubClassOf("X86Inst"))
+
+  if (!ShouldBeEmitted)
     return;
-  
+
   switch (filter()) {
   case FILTER_WEAK:
     Spec->filtered = true;
@@ -558,22 +543,19 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   case FILTER_NORMAL:
     break;
   }
-  
+
   Spec->insnContext = insnContext();
-    
+
   const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
-  
+
   unsigned numOperands = OperandList.size();
   unsigned numPhysicalOperands = 0;
-  
+
   // operandMapping maps from operands in OperandList to their originals.
   // If operandMapping[i] != i, then the entry is a duplicate.
   unsigned operandMapping[X86_MAX_OPERANDS];
-  
-  bool hasFROperands = false;
-  
   assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
-  
+
   for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     if (OperandList[operandIndex].Constraints.size()) {
       const CGIOperandList::ConstraintInfo &Constraint =
@@ -589,20 +571,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
       ++numPhysicalOperands;
       operandMapping[operandIndex] = operandIndex;
     }
-
-    const std::string &recName = OperandList[operandIndex].Rec->getName();
-
-    if (recName.find("FR") != recName.npos)
-      hasFROperands = true;
   }
-  
-  if (hasFROperands && Name.find("MOV") != Name.npos &&
-     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
-      (Name.find("to") != Name.npos)))
-    ShouldBeEmitted = false;
-  
-  if (!ShouldBeEmitted)
-    return;
 
 #define HANDLE_OPERAND(class)               \
   handleOperand(false,                      \
@@ -611,7 +580,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
                 numPhysicalOperands,        \
                 operandMapping,             \
                 class##EncodingFromString);
-  
+
 #define HANDLE_OPTIONAL(class)              \
   handleOperand(true,                       \
                 operandIndex,               \
@@ -619,17 +588,17 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
                 numPhysicalOperands,        \
                 operandMapping,             \
                 class##EncodingFromString);
-  
+
   // operandIndex should always be < numOperands
   unsigned operandIndex = 0;
   // physicalOperandIndex should always be < numPhysicalOperands
   unsigned physicalOperandIndex = 0;
-    
+
   switch (Form) {
   case X86Local::RawFrm:
     // Operand 1 (optional) is an address or immediate.
     // Operand 2 (optional) is an immediate.
-    assert(numPhysicalOperands <= 2 && 
+    assert(numPhysicalOperands <= 2 &&
            "Unexpected number of operands for RawFrm");
     HANDLE_OPTIONAL(relocation)
     HANDLE_OPTIONAL(immediate)
@@ -653,14 +622,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
              "Unexpected number of operands for MRMDestRegFrm");
-  
+
     HANDLE_OPERAND(rmRegister)
 
     if (HasVEX_4VPrefix)
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
-          
+
     HANDLE_OPERAND(roRegister)
     HANDLE_OPTIONAL(immediate)
     break;
@@ -681,7 +650,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
-          
+
     HANDLE_OPERAND(roRegister)
     HANDLE_OPTIONAL(immediate)
     break;
@@ -694,11 +663,11 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
 
     if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
       assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
-             "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); 
+             "Unexpected number of operands for MRMSrcRegFrm with VEX_4V");
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 4 &&
              "Unexpected number of operands for MRMSrcRegFrm");
-  
+
     HANDLE_OPERAND(roRegister)
 
     if (HasVEX_4VPrefix)
@@ -727,11 +696,11 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
 
     if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
       assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
-             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); 
+             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V");
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
              "Unexpected number of operands for MRMSrcMemFrm");
-    
+
     HANDLE_OPERAND(roRegister)
 
     if (HasVEX_4VPrefix)
@@ -813,7 +782,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     // Ignored.
     break;
   }
-  
+
   #undef HANDLE_OPERAND
   #undef HANDLE_OPTIONAL
 }
@@ -827,8 +796,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     break;
 
   OpcodeType    opcodeType  = (OpcodeType)-1;
-  
-  ModRMFilter*  filter      = NULL; 
+
+  ModRMFilter*  filter      = NULL;
   uint8_t       opcodeToSet = 0;
 
   switch (Prefix) {
@@ -1026,26 +995,26 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     if(Spec->modifierType != MODIFIER_MODRM) {
       assert(opcodeToSet < 0xf9 &&
              "Not enough room for all ADDREG_FRM operands");
-    
+
       uint8_t currentOpcode;
 
       for (currentOpcode = opcodeToSet;
            currentOpcode < opcodeToSet + 8;
            ++currentOpcode)
-        tables.setTableFields(opcodeType, 
-                              insnContext(), 
-                              currentOpcode, 
-                              *filter, 
+        tables.setTableFields(opcodeType,
+                              insnContext(),
+                              currentOpcode,
+                              *filter,
                               UID, Is32Bit, IgnoresVEX_L);
-    
+
       Spec->modifierType = MODIFIER_OPCODE;
       Spec->modifierBase = opcodeToSet;
     } else {
       // modifierBase was set where MODIFIER_MODRM was set
-      tables.setTableFields(opcodeType, 
-                            insnContext(), 
-                            opcodeToSet, 
-                            *filter, 
+      tables.setTableFields(opcodeType,
+                            insnContext(),
+                            opcodeToSet,
+                            *filter,
                             UID, Is32Bit, IgnoresVEX_L);
     }
   } else {
@@ -1054,13 +1023,13 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
                           opcodeToSet,
                           *filter,
                           UID, Is32Bit, IgnoresVEX_L);
-    
+
     Spec->modifierType = MODIFIER_NONE;
     Spec->modifierBase = opcodeToSet;
   }
-  
+
   delete filter;
-  
+
 #undef MAP
 }
 
@@ -1070,7 +1039,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
                                               bool hasREX_WPrefix,
                                               bool hasOpSizePrefix) {
   if (isSSE) {
-    // For SSE instructions, we ignore the OpSize prefix and force operand 
+    // For SSE instructions, we ignore the OpSize prefix and force operand
     // sizes.
     TYPE("GR16",              TYPE_R16)
     TYPE("GR32",              TYPE_R32)
diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl
index 8f53514..d92a767 100755
--- a/utils/UpdateCMakeLists.pl
+++ b/utils/UpdateCMakeLists.pl
@@ -68,8 +68,7 @@ sub UpdateCMake {
   while(<IN>) {
     if (!$foundLibrary) {
       print OUT $_;
-      if (/^add_clang_library\(/ || /^add_llvm_library\(/ || /^add_llvm_target\(/
-          || /^add_executable\(/) {
+      if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_executable\(/) {
         $foundLibrary = 1;
         EmitCMakeList($dir);
       }
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 994fb06..6aee831 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -133,7 +133,7 @@ if [ \! -f Makefile.config ]; then
     || exit 1
 fi
 
-SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/[^.]*\.\([0-9]*\).*/\1/'`
+SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/.*\.\([0-9]*\).*/\1/'`
 
 if [ "x$SUBVERSION" != "x$RC_ProjectSourceVersion" ]; then
     LLVM_SUBMIT_SUBVERSION=`printf "%02d" $SUBVERSION`
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 039868d..25bbcbd 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -566,6 +566,9 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
     if opts.maxTests is not None:
         tests = tests[:opts.maxTests]
 
+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
     extra = ''
     if len(tests) != numTotalTests:
         extra = ' of %d' % numTotalTests
@@ -589,9 +592,6 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
         else:
             print header
 
-    # Don't create more threads than tests.
-    opts.numThreads = min(len(tests), opts.numThreads)
-
     startTime = time.time()
     display = TestingProgressDisplay(opts, len(tests), progressBar)
     provider = TestProvider(tests, opts.maxTime)
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 322036b..ad2799f 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -175,6 +175,7 @@ FuncAttr      ::= noreturn
  | returns_twice
  | nonlazybind
  | address_safety
+ | ia_nsdialect
  ;
 
 OptFuncAttrs  ::= + _ | OptFuncAttrs FuncAttr ;
diff --git a/utils/obj2yaml/coff2yaml.cpp b/utils/obj2yaml/coff2yaml.cpp
index 2dbd531..c9a7159 100644
--- a/utils/obj2yaml/coff2yaml.cpp
+++ b/utils/obj2yaml/coff2yaml.cpp
@@ -276,7 +276,8 @@ static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj,
     Obj.getSectionContents(sect, sectionData);    
     Out << "    SectionData: ";
     yaml::writeHexStream(Out, sectionData) << endl;
-    
+    if (iter->begin_relocations() != iter->end_relocations())
+      Out << "    Relocations:\n";
     for (llvm::object::relocation_iterator rIter = iter->begin_relocations();
                        rIter != iter->end_relocations(); rIter.increment(ec)) {
       const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
diff --git a/utils/yaml2obj/CMakeLists.txt b/utils/yaml2obj/CMakeLists.txt
new file mode 100644
index 0000000..f8b1197
--- /dev/null
+++ b/utils/yaml2obj/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(yaml2obj
+  yaml2obj.cpp
+  )
+
+target_link_libraries(yaml2obj LLVMSupport)
diff --git a/utils/yaml2obj/Makefile b/utils/yaml2obj/Makefile
new file mode 100644
index 0000000..e746d85
--- /dev/null
+++ b/utils/yaml2obj/Makefile
@@ -0,0 +1,20 @@
+##===- utils/yaml2obj/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = yaml2obj
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/yaml2obj/yaml2obj.cpp b/utils/yaml2obj/yaml2obj.cpp
new file mode 100644
index 0000000..c3b3e54
--- /dev/null
+++ b/utils/yaml2obj/yaml2obj.cpp
@@ -0,0 +1,879 @@
+//===- yaml2obj - Convert YAML to a binary object file --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program takes a YAML description of an object file and outputs the
+// binary equivalent.
+//
+// This is used for writing tests that require binary files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/YAMLParser.h"
+
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+  Input(cl::Positional, cl::desc("<input>"), cl::init("-"));
+
+template<class T>
+typename llvm::enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
+getAs(const llvm::yaml::ScalarNode *SN, T &Result) {
+  SmallString<4> Storage;
+  StringRef Value = SN->getValue(Storage);
+  if (Value.getAsInteger(0, Result))
+    return false;
+  return true;
+}
+
+// Given a container with begin and end with ::value_type of a character type.
+// Iterate through pairs of characters in the the set of [a-fA-F0-9] ignoring
+// all other characters.
+struct hex_pair_iterator {
+  StringRef::const_iterator Current, End;
+  typedef SmallVector<char, 2> value_type;
+  value_type Pair;
+  bool IsDone;
+
+  hex_pair_iterator(StringRef C)
+    : Current(C.begin()), End(C.end()), IsDone(false) {
+    // Initalize Pair.
+    ++*this;
+  }
+
+  // End iterator.
+  hex_pair_iterator() : Current(), End(), IsDone(true) {}
+
+  value_type operator *() const {
+    return Pair;
+  }
+
+  hex_pair_iterator operator ++() {
+    // We're at the end of the input.
+    if (Current == End) {
+      IsDone = true;
+      return *this;
+    }
+    Pair = value_type();
+    for (; Current != End && Pair.size() != 2; ++Current) {
+      // Is a valid hex digit.
+      if ((*Current >= '0' && *Current <= '9') ||
+          (*Current >= 'a' && *Current <= 'f') ||
+          (*Current >= 'A' && *Current <= 'F'))
+        Pair.push_back(*Current);
+    }
+    // Hit the end without getting 2 hex digits. Pair is invalid.
+    if (Pair.size() != 2)
+      IsDone = true;
+    return *this;
+  }
+
+  bool operator ==(const hex_pair_iterator Other) {
+    return (IsDone == Other.IsDone) ||
+           (Current == Other.Current && End == Other.End);
+  }
+
+  bool operator !=(const hex_pair_iterator Other) {
+    return !(*this == Other);
+  }
+};
+
+template <class ContainerOut>
+static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) {
+  for (hex_pair_iterator I(Str), E; I != E; ++I) {
+    typename hex_pair_iterator::value_type Pair = *I;
+    typename ContainerOut::value_type Byte;
+    if (StringRef(Pair.data(), 2).getAsInteger(16, Byte))
+      return false;
+    Out.push_back(Byte);
+  }
+  return true;
+}
+
+/// This parses a yaml stream that represents a COFF object file.
+/// See docs/yaml2obj for the yaml scheema.
+struct COFFParser {
+  COFFParser(yaml::Stream &Input) : YS(Input) {
+    std::memset(&Header, 0, sizeof(Header));
+    // A COFF string table always starts with a 4 byte size field. Offsets into
+    // it include this size, so allocate it now.
+    StringTable.append(4, 0);
+  }
+
+  bool parseHeader(yaml::Node *HeaderN) {
+    yaml::MappingNode *MN = dyn_cast<yaml::MappingNode>(HeaderN);
+    if (!MN) {
+      YS.printError(HeaderN, "header's value must be a mapping node");
+      return false;
+    }
+    for (yaml::MappingNode::iterator i = MN->begin(), e = MN->end();
+                                     i != e; ++i) {
+      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
+      if (!Key) {
+        YS.printError(i->getKey(), "Keys must be scalar values");
+        return false;
+      }
+      SmallString<32> Storage;
+      StringRef KeyValue = Key->getValue(Storage);
+      if (KeyValue == "Characteristics") {
+        if (!parseHeaderCharacteristics(i->getValue()))
+          return false;
+      } else {
+        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(i->getValue());
+        if (!Value) {
+          YS.printError(Value,
+            Twine(KeyValue) + " must be a scalar value");
+          return false;
+        }
+        if (KeyValue == "Machine") {
+          uint16_t Machine;
+          if (!getAs(Value, Machine)) {
+            // It's not a raw number, try matching the string.
+            StringRef ValueValue = Value->getValue(Storage);
+            Machine = StringSwitch<COFF::MachineTypes>(ValueValue)
+              .Case( "IMAGE_FILE_MACHINE_UNKNOWN"
+                   , COFF::IMAGE_FILE_MACHINE_UNKNOWN)
+              .Case( "IMAGE_FILE_MACHINE_AM33"
+                   , COFF::IMAGE_FILE_MACHINE_AM33)
+              .Case( "IMAGE_FILE_MACHINE_AMD64"
+                   , COFF::IMAGE_FILE_MACHINE_AMD64)
+              .Case( "IMAGE_FILE_MACHINE_ARM"
+                   , COFF::IMAGE_FILE_MACHINE_ARM)
+              .Case( "IMAGE_FILE_MACHINE_ARMV7"
+                   , COFF::IMAGE_FILE_MACHINE_ARMV7)
+              .Case( "IMAGE_FILE_MACHINE_EBC"
+                   , COFF::IMAGE_FILE_MACHINE_EBC)
+              .Case( "IMAGE_FILE_MACHINE_I386"
+                   , COFF::IMAGE_FILE_MACHINE_I386)
+              .Case( "IMAGE_FILE_MACHINE_IA64"
+                   , COFF::IMAGE_FILE_MACHINE_IA64)
+              .Case( "IMAGE_FILE_MACHINE_M32R"
+                   , COFF::IMAGE_FILE_MACHINE_M32R)
+              .Case( "IMAGE_FILE_MACHINE_MIPS16"
+                   , COFF::IMAGE_FILE_MACHINE_MIPS16)
+              .Case( "IMAGE_FILE_MACHINE_MIPSFPU"
+                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU)
+              .Case( "IMAGE_FILE_MACHINE_MIPSFPU16"
+                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU16)
+              .Case( "IMAGE_FILE_MACHINE_POWERPC"
+                   , COFF::IMAGE_FILE_MACHINE_POWERPC)
+              .Case( "IMAGE_FILE_MACHINE_POWERPCFP"
+                   , COFF::IMAGE_FILE_MACHINE_POWERPCFP)
+              .Case( "IMAGE_FILE_MACHINE_R4000"
+                   , COFF::IMAGE_FILE_MACHINE_R4000)
+              .Case( "IMAGE_FILE_MACHINE_SH3"
+                   , COFF::IMAGE_FILE_MACHINE_SH3)
+              .Case( "IMAGE_FILE_MACHINE_SH3DSP"
+                   , COFF::IMAGE_FILE_MACHINE_SH3DSP)
+              .Case( "IMAGE_FILE_MACHINE_SH4"
+                   , COFF::IMAGE_FILE_MACHINE_SH4)
+              .Case( "IMAGE_FILE_MACHINE_SH5"
+                   , COFF::IMAGE_FILE_MACHINE_SH5)
+              .Case( "IMAGE_FILE_MACHINE_THUMB"
+                   , COFF::IMAGE_FILE_MACHINE_THUMB)
+              .Case( "IMAGE_FILE_MACHINE_WCEMIPSV2"
+                   , COFF::IMAGE_FILE_MACHINE_WCEMIPSV2)
+              .Default(COFF::MT_Invalid);
+            if (Machine == COFF::MT_Invalid) {
+              YS.printError(Value, "Invalid value for Machine");
+              return false;
+            }
+          }
+          Header.Machine = Machine;
+        } else if (KeyValue == "NumberOfSections") {
+          if (!getAs(Value, Header.NumberOfSections)) {
+              YS.printError(Value, "Invalid value for NumberOfSections");
+              return false;
+          }
+        } else if (KeyValue == "TimeDateStamp") {
+          if (!getAs(Value, Header.TimeDateStamp)) {
+              YS.printError(Value, "Invalid value for TimeDateStamp");
+              return false;
+          }
+        } else if (KeyValue == "PointerToSymbolTable") {
+          if (!getAs(Value, Header.PointerToSymbolTable)) {
+              YS.printError(Value, "Invalid value for PointerToSymbolTable");
+              return false;
+          }
+        } else if (KeyValue == "NumberOfSymbols") {
+          if (!getAs(Value, Header.NumberOfSymbols)) {
+              YS.printError(Value, "Invalid value for NumberOfSymbols");
+              return false;
+          }
+        } else if (KeyValue == "SizeOfOptionalHeader") {
+          if (!getAs(Value, Header.SizeOfOptionalHeader)) {
+              YS.printError(Value, "Invalid value for SizeOfOptionalHeader");
+              return false;
+          }
+        } else {
+          YS.printError(Key, "Unrecognized key in header");
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  bool parseHeaderCharacteristics(yaml::Node *Characteristics) {
+    yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(Characteristics);
+    yaml::SequenceNode *SeqValue
+      = dyn_cast<yaml::SequenceNode>(Characteristics);
+    if (!Value && !SeqValue) {
+      YS.printError(Characteristics,
+        "Characteristics must either be a number or sequence");
+      return false;
+    }
+    if (Value) {
+      if (!getAs(Value, Header.Characteristics)) {
+        YS.printError(Value, "Invalid value for Characteristics");
+        return false;
+      }
+    } else {
+      for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
+                                        ce = SeqValue->end();
+                                        ci != ce; ++ci) {
+        yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
+        if (!CharValue) {
+          YS.printError(CharValue,
+            "Characteristics must be scalar values");
+          return false;
+        }
+        SmallString<32> Storage;
+        StringRef Char = CharValue->getValue(Storage);
+        uint16_t Characteristic = StringSwitch<COFF::Characteristics>(Char)
+          .Case( "IMAGE_FILE_RELOCS_STRIPPED"
+                , COFF::IMAGE_FILE_RELOCS_STRIPPED)
+          .Case( "IMAGE_FILE_EXECUTABLE_IMAGE"
+                , COFF::IMAGE_FILE_EXECUTABLE_IMAGE)
+          .Case( "IMAGE_FILE_LINE_NUMS_STRIPPED"
+                , COFF::IMAGE_FILE_LINE_NUMS_STRIPPED)
+          .Case( "IMAGE_FILE_LOCAL_SYMS_STRIPPED"
+                , COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED)
+          .Case( "IMAGE_FILE_AGGRESSIVE_WS_TRIM"
+                , COFF::IMAGE_FILE_AGGRESSIVE_WS_TRIM)
+          .Case( "IMAGE_FILE_LARGE_ADDRESS_AWARE"
+                , COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE)
+          .Case( "IMAGE_FILE_BYTES_REVERSED_LO"
+                , COFF::IMAGE_FILE_BYTES_REVERSED_LO)
+          .Case( "IMAGE_FILE_32BIT_MACHINE"
+                , COFF::IMAGE_FILE_32BIT_MACHINE)
+          .Case( "IMAGE_FILE_DEBUG_STRIPPED"
+                , COFF::IMAGE_FILE_DEBUG_STRIPPED)
+          .Case( "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP"
+                , COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP)
+          .Case( "IMAGE_FILE_SYSTEM"
+                , COFF::IMAGE_FILE_SYSTEM)
+          .Case( "IMAGE_FILE_DLL"
+                , COFF::IMAGE_FILE_DLL)
+          .Case( "IMAGE_FILE_UP_SYSTEM_ONLY"
+                , COFF::IMAGE_FILE_UP_SYSTEM_ONLY)
+          .Default(COFF::C_Invalid);
+        if (Characteristic == COFF::C_Invalid) {
+          // TODO: Typo-correct.
+          YS.printError(CharValue,
+            "Invalid value for Characteristic");
+          return false;
+        }
+        Header.Characteristics |= Characteristic;
+      }
+    }
+    return true;
+  }
+
+  bool parseSections(yaml::Node *SectionsN) {
+    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SectionsN);
+    if (!SN) {
+      YS.printError(SectionsN, "Sections must be a sequence");
+      return false;
+    }
+    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
+                                      i != e; ++i) {
+      Section Sec;
+      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
+      yaml::MappingNode *SecMap = dyn_cast<yaml::MappingNode>(&*i);
+      if (!SecMap) {
+        YS.printError(&*i, "Section entry must be a map");
+        return false;
+      }
+      for (yaml::MappingNode::iterator si = SecMap->begin(), se = SecMap->end();
+                                       si != se; ++si) {
+        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
+        if (!Key) {
+          YS.printError(si->getKey(), "Keys must be scalar values");
+          return false;
+        }
+        SmallString<32> Storage;
+        StringRef KeyValue = Key->getValue(Storage);
+
+        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
+        if (KeyValue == "Name") {
+          // If the name is less than 8 bytes, store it in place, otherwise
+          // store it in the string table.
+          StringRef Name = Value->getValue(Storage);
+          std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
+          if (Name.size() <= COFF::NameSize) {
+            std::copy(Name.begin(), Name.end(), Sec.Header.Name);
+          } else {
+            // Add string to the string table and format the index for output.
+            unsigned Index = getStringIndex(Name);
+            std::string str = utostr(Index);
+            if (str.size() > 7) {
+              YS.printError(Value, "String table got too large");
+              return false;
+            }
+            Sec.Header.Name[0] = '/';
+            std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
+          }
+        } else if (KeyValue == "VirtualSize") {
+          if (!getAs(Value, Sec.Header.VirtualSize)) {
+            YS.printError(Value, "Invalid value for VirtualSize");
+            return false;
+          }
+        } else if (KeyValue == "VirtualAddress") {
+          if (!getAs(Value, Sec.Header.VirtualAddress)) {
+            YS.printError(Value, "Invalid value for VirtualAddress");
+            return false;
+          }
+        } else if (KeyValue == "SizeOfRawData") {
+          if (!getAs(Value, Sec.Header.SizeOfRawData)) {
+            YS.printError(Value, "Invalid value for SizeOfRawData");
+            return false;
+          }
+        } else if (KeyValue == "PointerToRawData") {
+          if (!getAs(Value, Sec.Header.PointerToRawData)) {
+            YS.printError(Value, "Invalid value for PointerToRawData");
+            return false;
+          }
+        } else if (KeyValue == "PointerToRelocations") {
+          if (!getAs(Value, Sec.Header.PointerToRelocations)) {
+            YS.printError(Value, "Invalid value for PointerToRelocations");
+            return false;
+          }
+        } else if (KeyValue == "PointerToLineNumbers") {
+          if (!getAs(Value, Sec.Header.PointerToLineNumbers)) {
+            YS.printError(Value, "Invalid value for PointerToLineNumbers");
+            return false;
+          }
+        } else if (KeyValue == "NumberOfRelocations") {
+          if (!getAs(Value, Sec.Header.NumberOfRelocations)) {
+            YS.printError(Value, "Invalid value for NumberOfRelocations");
+            return false;
+          }
+        } else if (KeyValue == "NumberOfLineNumbers") {
+          if (!getAs(Value, Sec.Header.NumberOfLineNumbers)) {
+            YS.printError(Value, "Invalid value for NumberOfLineNumbers");
+            return false;
+          }
+        } else if (KeyValue == "Characteristics") {
+          yaml::SequenceNode *SeqValue
+            = dyn_cast<yaml::SequenceNode>(si->getValue());
+          if (!Value && !SeqValue) {
+            YS.printError(si->getValue(),
+              "Characteristics must either be a number or sequence");
+            return false;
+          }
+          if (Value) {
+            if (!getAs(Value, Sec.Header.Characteristics)) {
+              YS.printError(Value, "Invalid value for Characteristics");
+              return false;
+            }
+          } else {
+            for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
+                                              ce = SeqValue->end();
+                                              ci != ce; ++ci) {
+              yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
+              if (!CharValue) {
+                YS.printError(CharValue, "Invalid value for Characteristics");
+                return false;
+              }
+              StringRef Char = CharValue->getValue(Storage);
+              uint32_t Characteristic =
+                StringSwitch<COFF::SectionCharacteristics>(Char)
+                .Case( "IMAGE_SCN_TYPE_NO_PAD"
+                     , COFF::IMAGE_SCN_TYPE_NO_PAD)
+                .Case( "IMAGE_SCN_CNT_CODE"
+                     , COFF::IMAGE_SCN_CNT_CODE)
+                .Case( "IMAGE_SCN_CNT_INITIALIZED_DATA"
+                     , COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+                .Case( "IMAGE_SCN_CNT_UNINITIALIZED_DATA"
+                     , COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+                .Case( "IMAGE_SCN_LNK_OTHER"
+                     , COFF::IMAGE_SCN_LNK_OTHER)
+                .Case( "IMAGE_SCN_LNK_INFO"
+                     , COFF::IMAGE_SCN_LNK_INFO)
+                .Case( "IMAGE_SCN_LNK_REMOVE"
+                     , COFF::IMAGE_SCN_LNK_REMOVE)
+                .Case( "IMAGE_SCN_LNK_COMDAT"
+                     , COFF::IMAGE_SCN_LNK_COMDAT)
+                .Case( "IMAGE_SCN_GPREL"
+                     , COFF::IMAGE_SCN_GPREL)
+                .Case( "IMAGE_SCN_MEM_PURGEABLE"
+                     , COFF::IMAGE_SCN_MEM_PURGEABLE)
+                .Case( "IMAGE_SCN_MEM_16BIT"
+                     , COFF::IMAGE_SCN_MEM_16BIT)
+                .Case( "IMAGE_SCN_MEM_LOCKED"
+                     , COFF::IMAGE_SCN_MEM_LOCKED)
+                .Case( "IMAGE_SCN_MEM_PRELOAD"
+                     , COFF::IMAGE_SCN_MEM_PRELOAD)
+                .Case( "IMAGE_SCN_ALIGN_1BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_1BYTES)
+                .Case( "IMAGE_SCN_ALIGN_2BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_2BYTES)
+                .Case( "IMAGE_SCN_ALIGN_4BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_4BYTES)
+                .Case( "IMAGE_SCN_ALIGN_8BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_8BYTES)
+                .Case( "IMAGE_SCN_ALIGN_16BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_16BYTES)
+                .Case( "IMAGE_SCN_ALIGN_32BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_32BYTES)
+                .Case( "IMAGE_SCN_ALIGN_64BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_64BYTES)
+                .Case( "IMAGE_SCN_ALIGN_128BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_128BYTES)
+                .Case( "IMAGE_SCN_ALIGN_256BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_256BYTES)
+                .Case( "IMAGE_SCN_ALIGN_512BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_512BYTES)
+                .Case( "IMAGE_SCN_ALIGN_1024BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_1024BYTES)
+                .Case( "IMAGE_SCN_ALIGN_2048BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_2048BYTES)
+                .Case( "IMAGE_SCN_ALIGN_4096BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_4096BYTES)
+                .Case( "IMAGE_SCN_ALIGN_8192BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_8192BYTES)
+                .Case( "IMAGE_SCN_LNK_NRELOC_OVFL"
+                     , COFF::IMAGE_SCN_LNK_NRELOC_OVFL)
+                .Case( "IMAGE_SCN_MEM_DISCARDABLE"
+                     , COFF::IMAGE_SCN_MEM_DISCARDABLE)
+                .Case( "IMAGE_SCN_MEM_NOT_CACHED"
+                     , COFF::IMAGE_SCN_MEM_NOT_CACHED)
+                .Case( "IMAGE_SCN_MEM_NOT_PAGED"
+                     , COFF::IMAGE_SCN_MEM_NOT_PAGED)
+                .Case( "IMAGE_SCN_MEM_SHARED"
+                     , COFF::IMAGE_SCN_MEM_SHARED)
+                .Case( "IMAGE_SCN_MEM_EXECUTE"
+                     , COFF::IMAGE_SCN_MEM_EXECUTE)
+                .Case( "IMAGE_SCN_MEM_READ"
+                     , COFF::IMAGE_SCN_MEM_READ)
+                .Case( "IMAGE_SCN_MEM_WRITE"
+                     , COFF::IMAGE_SCN_MEM_WRITE)
+                .Default(COFF::SC_Invalid);
+              if (Characteristic == COFF::SC_Invalid) {
+                YS.printError(CharValue, "Invalid value for Characteristic");
+                return false;
+              }
+              Sec.Header.Characteristics |= Characteristic;
+            }
+          }
+        } else if (KeyValue == "SectionData") {
+          yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
+          SmallString<32> Storage;
+          StringRef Data = Value->getValue(Storage);
+          if (!hexStringToByteArray(Data, Sec.Data)) {
+            YS.printError(Value, "SectionData must be a collection of pairs of"
+                                 "hex bytes");
+            return false;
+          }
+        } else
+          si->skip();
+      }
+      Sections.push_back(Sec);
+    }
+    return true;
+  }
+
+  bool parseSymbols(yaml::Node *SymbolsN) {
+    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SymbolsN);
+    if (!SN) {
+      YS.printError(SymbolsN, "Symbols must be a sequence");
+      return false;
+    }
+    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
+                                      i != e; ++i) {
+      Symbol Sym;
+      std::memset(&Sym.Header, 0, sizeof(Sym.Header));
+      yaml::MappingNode *SymMap = dyn_cast<yaml::MappingNode>(&*i);
+      if (!SymMap) {
+        YS.printError(&*i, "Symbol must be a map");
+        return false;
+      }
+      for (yaml::MappingNode::iterator si = SymMap->begin(), se = SymMap->end();
+                                       si != se; ++si) {
+        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
+        if (!Key) {
+          YS.printError(si->getKey(), "Keys must be scalar values");
+          return false;
+        }
+        SmallString<32> Storage;
+        StringRef KeyValue = Key->getValue(Storage);
+
+        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
+        if (!Value) {
+          YS.printError(si->getValue(), "Must be a scalar value");
+          return false;
+        }
+        if (KeyValue == "Name") {
+          // If the name is less than 8 bytes, store it in place, otherwise
+          // store it in the string table.
+          StringRef Name = Value->getValue(Storage);
+          std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
+          if (Name.size() <= COFF::NameSize) {
+            std::copy(Name.begin(), Name.end(), Sym.Header.Name);
+          } else {
+            // Add string to the string table and format the index for output.
+            unsigned Index = getStringIndex(Name);
+            *reinterpret_cast<support::aligned_ulittle32_t*>(
+              Sym.Header.Name + 4) = Index;
+          }
+        } else if (KeyValue == "Value") {
+          if (!getAs(Value, Sym.Header.Value)) {
+            YS.printError(Value, "Invalid value for Value");
+            return false;
+          }
+        } else if (KeyValue == "SimpleType") {
+          Sym.Header.Type |= StringSwitch<COFF::SymbolBaseType>(
+            Value->getValue(Storage))
+            .Case("IMAGE_SYM_TYPE_NULL", COFF::IMAGE_SYM_TYPE_NULL)
+            .Case("IMAGE_SYM_TYPE_VOID", COFF::IMAGE_SYM_TYPE_VOID)
+            .Case("IMAGE_SYM_TYPE_CHAR", COFF::IMAGE_SYM_TYPE_CHAR)
+            .Case("IMAGE_SYM_TYPE_SHORT", COFF::IMAGE_SYM_TYPE_SHORT)
+            .Case("IMAGE_SYM_TYPE_INT", COFF::IMAGE_SYM_TYPE_INT)
+            .Case("IMAGE_SYM_TYPE_LONG", COFF::IMAGE_SYM_TYPE_LONG)
+            .Case("IMAGE_SYM_TYPE_FLOAT", COFF::IMAGE_SYM_TYPE_FLOAT)
+            .Case("IMAGE_SYM_TYPE_DOUBLE", COFF::IMAGE_SYM_TYPE_DOUBLE)
+            .Case("IMAGE_SYM_TYPE_STRUCT", COFF::IMAGE_SYM_TYPE_STRUCT)
+            .Case("IMAGE_SYM_TYPE_UNION", COFF::IMAGE_SYM_TYPE_UNION)
+            .Case("IMAGE_SYM_TYPE_ENUM", COFF::IMAGE_SYM_TYPE_ENUM)
+            .Case("IMAGE_SYM_TYPE_MOE", COFF::IMAGE_SYM_TYPE_MOE)
+            .Case("IMAGE_SYM_TYPE_BYTE", COFF::IMAGE_SYM_TYPE_BYTE)
+            .Case("IMAGE_SYM_TYPE_WORD", COFF::IMAGE_SYM_TYPE_WORD)
+            .Case("IMAGE_SYM_TYPE_UINT", COFF::IMAGE_SYM_TYPE_UINT)
+            .Case("IMAGE_SYM_TYPE_DWORD", COFF::IMAGE_SYM_TYPE_DWORD)
+            .Default(COFF::IMAGE_SYM_TYPE_NULL);
+        } else if (KeyValue == "ComplexType") {
+          Sym.Header.Type |= StringSwitch<COFF::SymbolComplexType>(
+            Value->getValue(Storage))
+            .Case("IMAGE_SYM_DTYPE_NULL", COFF::IMAGE_SYM_DTYPE_NULL)
+            .Case("IMAGE_SYM_DTYPE_POINTER", COFF::IMAGE_SYM_DTYPE_POINTER)
+            .Case("IMAGE_SYM_DTYPE_FUNCTION", COFF::IMAGE_SYM_DTYPE_FUNCTION)
+            .Case("IMAGE_SYM_DTYPE_ARRAY", COFF::IMAGE_SYM_DTYPE_ARRAY)
+            .Default(COFF::IMAGE_SYM_DTYPE_NULL)
+            << COFF::SCT_COMPLEX_TYPE_SHIFT;
+        } else if (KeyValue == "StorageClass") {
+          Sym.Header.StorageClass = StringSwitch<COFF::SymbolStorageClass>(
+            Value->getValue(Storage))
+            .Case( "IMAGE_SYM_CLASS_END_OF_FUNCTION"
+                 , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION)
+            .Case( "IMAGE_SYM_CLASS_NULL"
+                 , COFF::IMAGE_SYM_CLASS_NULL)
+            .Case( "IMAGE_SYM_CLASS_AUTOMATIC"
+                 , COFF::IMAGE_SYM_CLASS_AUTOMATIC)
+            .Case( "IMAGE_SYM_CLASS_EXTERNAL"
+                 , COFF::IMAGE_SYM_CLASS_EXTERNAL)
+            .Case( "IMAGE_SYM_CLASS_STATIC"
+                 , COFF::IMAGE_SYM_CLASS_STATIC)
+            .Case( "IMAGE_SYM_CLASS_REGISTER"
+                 , COFF::IMAGE_SYM_CLASS_REGISTER)
+            .Case( "IMAGE_SYM_CLASS_EXTERNAL_DEF"
+                 , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF)
+            .Case( "IMAGE_SYM_CLASS_LABEL"
+                 , COFF::IMAGE_SYM_CLASS_LABEL)
+            .Case( "IMAGE_SYM_CLASS_UNDEFINED_LABEL"
+                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL)
+            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_STRUCT"
+                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT)
+            .Case( "IMAGE_SYM_CLASS_ARGUMENT"
+                 , COFF::IMAGE_SYM_CLASS_ARGUMENT)
+            .Case( "IMAGE_SYM_CLASS_STRUCT_TAG"
+                 , COFF::IMAGE_SYM_CLASS_STRUCT_TAG)
+            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_UNION"
+                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION)
+            .Case( "IMAGE_SYM_CLASS_UNION_TAG"
+                 , COFF::IMAGE_SYM_CLASS_UNION_TAG)
+            .Case( "IMAGE_SYM_CLASS_TYPE_DEFINITION"
+                 , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION)
+            .Case( "IMAGE_SYM_CLASS_UNDEFINED_STATIC"
+                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC)
+            .Case( "IMAGE_SYM_CLASS_ENUM_TAG"
+                 , COFF::IMAGE_SYM_CLASS_ENUM_TAG)
+            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_ENUM"
+                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM)
+            .Case( "IMAGE_SYM_CLASS_REGISTER_PARAM"
+                 , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM)
+            .Case( "IMAGE_SYM_CLASS_BIT_FIELD"
+                 , COFF::IMAGE_SYM_CLASS_BIT_FIELD)
+            .Case( "IMAGE_SYM_CLASS_BLOCK"
+                 , COFF::IMAGE_SYM_CLASS_BLOCK)
+            .Case( "IMAGE_SYM_CLASS_FUNCTION"
+                 , COFF::IMAGE_SYM_CLASS_FUNCTION)
+            .Case( "IMAGE_SYM_CLASS_END_OF_STRUCT"
+                 , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT)
+            .Case( "IMAGE_SYM_CLASS_FILE"
+                 , COFF::IMAGE_SYM_CLASS_FILE)
+            .Case( "IMAGE_SYM_CLASS_SECTION"
+                 , COFF::IMAGE_SYM_CLASS_SECTION)
+            .Case( "IMAGE_SYM_CLASS_WEAK_EXTERNAL"
+                 , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+            .Case( "IMAGE_SYM_CLASS_CLR_TOKEN"
+                 , COFF::IMAGE_SYM_CLASS_CLR_TOKEN)
+            .Default(COFF::SSC_Invalid);
+          if (Sym.Header.StorageClass == COFF::SSC_Invalid) {
+            YS.printError(Value, "Invalid value for StorageClass");
+            return false;
+          }
+        } else if (KeyValue == "SectionNumber") {
+          if (!getAs(Value, Sym.Header.SectionNumber)) {
+              YS.printError(Value, "Invalid value for SectionNumber");
+              return false;
+          }
+        } else if (KeyValue == "AuxillaryData") {
+          StringRef Data = Value->getValue(Storage);
+          if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
+            YS.printError(Value, "AuxillaryData must be a collection of pairs"
+                                 "of hex bytes");
+            return false;
+          }
+        } else
+          si->skip();
+      }
+      Symbols.push_back(Sym);
+    }
+    return true;
+  }
+
+  bool parse() {
+    yaml::Document &D = *YS.begin();
+    yaml::MappingNode *Root = dyn_cast<yaml::MappingNode>(D.getRoot());
+    if (!Root) {
+      YS.printError(D.getRoot(), "Root node must be a map");
+      return false;
+    }
+    for (yaml::MappingNode::iterator i = Root->begin(), e = Root->end();
+                                     i != e; ++i) {
+      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
+      if (!Key) {
+        YS.printError(i->getKey(), "Keys must be scalar values");
+        return false;
+      }
+      SmallString<32> Storage;
+      StringRef KeyValue = Key->getValue(Storage);
+      if (KeyValue == "header") {
+        if (!parseHeader(i->getValue()))
+          return false;
+      } else if (KeyValue == "sections") {
+        if (!parseSections(i->getValue()))
+          return false;
+      } else if (KeyValue == "symbols") {
+        if (!parseSymbols(i->getValue()))
+          return false;
+      }
+    }
+    return !YS.failed();
+  }
+
+  unsigned getStringIndex(StringRef Str) {
+    StringMap<unsigned>::iterator i = StringTableMap.find(Str);
+    if (i == StringTableMap.end()) {
+      unsigned Index = StringTable.size();
+      StringTable.append(Str.begin(), Str.end());
+      StringTable.push_back(0);
+      StringTableMap[Str] = Index;
+      return Index;
+    }
+    return i->second;
+  }
+
+  yaml::Stream &YS;
+  COFF::header Header;
+
+  struct Section {
+    COFF::section Header;
+    std::vector<uint8_t> Data;
+    std::vector<COFF::relocation> Relocations;
+  };
+
+  struct Symbol {
+    COFF::symbol Header;
+    std::vector<uint8_t> AuxSymbols;
+  };
+
+  std::vector<Section> Sections;
+  std::vector<Symbol> Symbols;
+  StringMap<unsigned> StringTableMap;
+  std::string StringTable;
+};
+
+// Take a CP and assign addresses and sizes to everything. Returns false if the
+// layout is not valid to do.
+static bool layoutCOFF(COFFParser &CP) {
+  uint32_t SectionTableStart = 0;
+  uint32_t SectionTableSize  = 0;
+
+  // The section table starts immediately after the header, including the
+  // optional header.
+  SectionTableStart = sizeof(COFF::header) + CP.Header.SizeOfOptionalHeader;
+  SectionTableSize = sizeof(COFF::section) * CP.Sections.size();
+
+  uint32_t CurrentSectionDataOffset = SectionTableStart + SectionTableSize;
+
+  // Assign each section data address consecutively.
+  for (std::vector<COFFParser::Section>::iterator i = CP.Sections.begin(),
+                                                  e = CP.Sections.end();
+                                                  i != e; ++i) {
+    if (!i->Data.empty()) {
+      i->Header.SizeOfRawData = i->Data.size();
+      i->Header.PointerToRawData = CurrentSectionDataOffset;
+      CurrentSectionDataOffset += i->Header.SizeOfRawData;
+      // TODO: Handle alignment.
+    } else {
+      i->Header.SizeOfRawData = 0;
+      i->Header.PointerToRawData = 0;
+    }
+  }
+
+  uint32_t SymbolTableStart = CurrentSectionDataOffset;
+
+  // Calculate number of symbols.
+  uint32_t NumberOfSymbols = 0;
+  for (std::vector<COFFParser::Symbol>::iterator i = CP.Symbols.begin(),
+                                                 e = CP.Symbols.end();
+                                                 i != e; ++i) {
+    if (i->AuxSymbols.size() % COFF::SymbolSize != 0) {
+      errs() << "AuxillaryData size not a multiple of symbol size!\n";
+      return false;
+    }
+    i->Header.NumberOfAuxSymbols = i->AuxSymbols.size() / COFF::SymbolSize;
+    NumberOfSymbols += 1 + i->Header.NumberOfAuxSymbols;
+  }
+
+  // Store all the allocated start addresses in the header.
+  CP.Header.NumberOfSections = CP.Sections.size();
+  CP.Header.NumberOfSymbols = NumberOfSymbols;
+  CP.Header.PointerToSymbolTable = SymbolTableStart;
+
+  *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0])
+    = CP.StringTable.size();
+
+  return true;
+}
+
+template <typename value_type>
+struct binary_le_impl {
+  value_type Value;
+  binary_le_impl(value_type V) : Value(V) {}
+};
+
+template <typename value_type>
+raw_ostream &operator <<( raw_ostream &OS
+                        , const binary_le_impl<value_type> &BLE) {
+  char Buffer[sizeof(BLE.Value)];
+  support::endian::write_le<value_type, support::unaligned>(Buffer, BLE.Value);
+  OS.write(Buffer, sizeof(BLE.Value));
+  return OS;
+}
+
+template <typename value_type>
+binary_le_impl<value_type> binary_le(value_type V) {
+  return binary_le_impl<value_type>(V);
+}
+
+void writeCOFF(COFFParser &CP, raw_ostream &OS) {
+  OS << binary_le(CP.Header.Machine)
+     << binary_le(CP.Header.NumberOfSections)
+     << binary_le(CP.Header.TimeDateStamp)
+     << binary_le(CP.Header.PointerToSymbolTable)
+     << binary_le(CP.Header.NumberOfSymbols)
+     << binary_le(CP.Header.SizeOfOptionalHeader)
+     << binary_le(CP.Header.Characteristics);
+
+  // Output section table.
+  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
+                                                        e = CP.Sections.end();
+                                                        i != e; ++i) {
+    OS.write(i->Header.Name, COFF::NameSize);
+    OS << binary_le(i->Header.VirtualSize)
+       << binary_le(i->Header.VirtualAddress)
+       << binary_le(i->Header.SizeOfRawData)
+       << binary_le(i->Header.PointerToRawData)
+       << binary_le(i->Header.PointerToRelocations)
+       << binary_le(i->Header.PointerToLineNumbers)
+       << binary_le(i->Header.NumberOfRelocations)
+       << binary_le(i->Header.NumberOfLineNumbers)
+       << binary_le(i->Header.Characteristics);
+  }
+
+  // Output section data.
+  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
+                                                        e = CP.Sections.end();
+                                                        i != e; ++i) {
+    if (!i->Data.empty())
+      OS.write(reinterpret_cast<const char*>(&i->Data[0]), i->Data.size());
+  }
+
+  // Output symbol table.
+
+  for (std::vector<COFFParser::Symbol>::const_iterator i = CP.Symbols.begin(),
+                                                       e = CP.Symbols.end();
+                                                       i != e; ++i) {
+    OS.write(i->Header.Name, COFF::NameSize);
+    OS << binary_le(i->Header.Value)
+       << binary_le(i->Header.SectionNumber)
+       << binary_le(i->Header.Type)
+       << binary_le(i->Header.StorageClass)
+       << binary_le(i->Header.NumberOfAuxSymbols);
+    if (!i->AuxSymbols.empty())
+      OS.write( reinterpret_cast<const char*>(&i->AuxSymbols[0])
+              , i->AuxSymbols.size());
+  }
+
+  // Output string table.
+  OS.write(&CP.StringTable[0], CP.StringTable.size());
+}
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  OwningPtr<MemoryBuffer> Buf;
+  if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
+    return 1;
+
+  SourceMgr SM;
+  yaml::Stream S(Buf->getBuffer(), SM);
+  COFFParser CP(S);
+  if (!CP.parse()) {
+    errs() << "yaml2obj: Failed to parse YAML file!\n";
+    return 1;
+  }
+  if (!layoutCOFF(CP)) {
+    errs() << "yaml2obj: Failed to layout COFF file!\n";
+    return 1;
+  }
+  writeCOFF(CP, outs());
+}