diff options
Diffstat (limited to 'utils')
24 files changed, 2015 insertions, 765 deletions
diff --git a/utils/Makefile b/utils/Makefile index ecb30be..7a3c17d 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -9,7 +9,7 @@ LEVEL = .. PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \ - count fpcmp llvm-lit not unittest + count fpcmp llvm-lit not unittest yaml2obj EXTRA_DIST := check-each-file codegen-diff countloc.sh \ DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \ diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index 0a8ae46..abcec8f 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -2447,7 +2447,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { emitSubtargetFeatureFlagEnumeration(Info, OS); // Emit the function to match a register name to number. - emitMatchRegisterName(Target, AsmParser, OS); + // This should be omitted for Mips target + if (AsmParser->getValueAsBit("ShouldEmitMatchRegisterName")) + emitMatchRegisterName(Target, AsmParser, OS); OS << "#endif // GET_REGISTER_MATCHER\n\n"; @@ -2649,7 +2651,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n"; OS << " if (i + 1 >= Operands.size()) {\n"; OS << " OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n"; - OS << " if (!OperandsValid) ErrorInfo = i + 1;\n;"; + OS << " if (!OperandsValid) ErrorInfo = i + 1;\n"; OS << " break;\n"; OS << " }\n"; OS << " unsigned Diag = validateOperandClass(Operands[i+1],\n"; @@ -2716,8 +2718,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " }\n\n"; OS << " // Okay, we had no match. Try to return a useful error code.\n"; - OS << " if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)"; - OS << " return RetCode;\n"; + OS << " if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n"; + OS << " return RetCode;\n\n"; OS << " // Missing feature matches return which features were missing\n"; OS << " ErrorInfo = MissingFeatures;\n"; OS << " return Match_MissingFeature;\n"; diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp index bd153a8..57979b3 100644 --- a/utils/TableGen/AsmWriterEmitter.cpp +++ b/utils/TableGen/AsmWriterEmitter.cpp @@ -14,8 +14,8 @@ #include "AsmWriterInst.h" #include "CodeGenTarget.h" -#include "StringToOffsetTable.h" #include "SequenceToOffsetTable.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp index 33381e9..12e153a 100644 --- a/utils/TableGen/CodeGenInstruction.cpp +++ b/utils/TableGen/CodeGenInstruction.cpp @@ -297,6 +297,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) { isCompare = R->getValueAsBit("isCompare"); isMoveImm = R->getValueAsBit("isMoveImm"); isBitcast = R->getValueAsBit("isBitcast"); + isSelect = R->getValueAsBit("isSelect"); isBarrier = R->getValueAsBit("isBarrier"); isCall = R->getValueAsBit("isCall"); canFoldAsLoad = R->getValueAsBit("canFoldAsLoad"); diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h index 3ba9f24..95b572d 100644 --- a/utils/TableGen/CodeGenInstruction.h +++ b/utils/TableGen/CodeGenInstruction.h @@ -222,6 +222,7 @@ namespace llvm { bool isCompare; bool isMoveImm; bool isBitcast; + bool isSelect; bool isBarrier; bool isCall; bool canFoldAsLoad; diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index 81bf9ed..011f4b7 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -28,19 +28,15 @@ using namespace llvm; //===----------------------------------------------------------------------===// CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum) - : TheDef(R), - EnumValue(Enum) -{} - -std::string CodeGenSubRegIndex::getNamespace() const { - if (TheDef->getValue("Namespace")) - return TheDef->getValueAsString("Namespace"); - else - return ""; + : TheDef(R), EnumValue(Enum) { + Name = R->getName(); + if (R->getValue("Namespace")) + Namespace = R->getValueAsString("Namespace"); } -const std::string &CodeGenSubRegIndex::getName() const { - return TheDef->getName(); +CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace, + unsigned Enum) + : TheDef(0), Name(N), Namespace(Nspace), EnumValue(Enum) { } std::string CodeGenSubRegIndex::getQualifiedName() const { @@ -52,16 +48,31 @@ std::string CodeGenSubRegIndex::getQualifiedName() const { } void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) { - std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf"); - if (Comps.empty()) + if (!TheDef) return; - if (Comps.size() != 2) - throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries"); - CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]); - CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]); - CodeGenSubRegIndex *X = A->addComposite(B, this); - if (X) - throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries"); + + std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf"); + if (!Comps.empty()) { + if (Comps.size() != 2) + throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries"); + CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]); + CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]); + CodeGenSubRegIndex *X = A->addComposite(B, this); + if (X) + throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries"); + } + + std::vector<Record*> Parts = + TheDef->getValueAsListOfDefs("CoveringSubRegIndices"); + if (!Parts.empty()) { + if (Parts.size() < 2) + throw TGError(TheDef->getLoc(), + "CoveredBySubRegs must have two or more entries"); + SmallVector<CodeGenSubRegIndex*, 8> IdxParts; + for (unsigned i = 0, e = Parts.size(); i != e; ++i) + IdxParts.push_back(RegBank.getSubRegIdx(Parts[i])); + RegBank.addConcatSubRegIndex(IdxParts, this); + } } void CodeGenSubRegIndex::cleanComposites() { @@ -187,10 +198,7 @@ bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) { unsigned OldNumUnits = RegUnits.size(); for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end(); I != E; ++I) { - // Strangely a register may have itself as a subreg (self-cycle) e.g. XMM. CodeGenRegister *SR = I->second; - if (SR == this) - continue; // Merge the subregister's units into this register's RegUnits. mergeRegUnits(RegUnits, SR->RegUnits); } @@ -260,44 +268,6 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { } } - // Process the composites. - ListInit *Comps = TheDef->getValueAsListInit("CompositeIndices"); - for (unsigned i = 0, e = Comps->size(); i != e; ++i) { - DagInit *Pat = dynamic_cast<DagInit*>(Comps->getElement(i)); - if (!Pat) - throw TGError(TheDef->getLoc(), "Invalid dag '" + - Comps->getElement(i)->getAsString() + - "' in CompositeIndices"); - DefInit *BaseIdxInit = dynamic_cast<DefInit*>(Pat->getOperator()); - if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex")) - throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " + - Pat->getAsString()); - CodeGenSubRegIndex *BaseIdx = RegBank.getSubRegIdx(BaseIdxInit->getDef()); - - // Resolve list of subreg indices into R2. - CodeGenRegister *R2 = this; - for (DagInit::const_arg_iterator di = Pat->arg_begin(), - de = Pat->arg_end(); di != de; ++di) { - DefInit *IdxInit = dynamic_cast<DefInit*>(*di); - if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex")) - throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " + - Pat->getAsString()); - CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxInit->getDef()); - const SubRegMap &R2Subs = R2->computeSubRegs(RegBank); - SubRegMap::const_iterator ni = R2Subs.find(Idx); - if (ni == R2Subs.end()) - throw TGError(TheDef->getLoc(), "Composite " + Pat->getAsString() + - " refers to bad index in " + R2->getName()); - R2 = ni->second; - } - - // Insert composite index. Allow overriding inherited indices etc. - SubRegs[BaseIdx] = R2; - - // R2 is no longer an orphan. - Orphans.erase(R2); - } - // Now Orphans contains the inherited subregisters without a direct index. // Create inferred indexes for all missing entries. // Work backwards in the Indices vector in order to compose subregs bottom-up. @@ -327,14 +297,25 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { // Compute the inverse SubReg -> Idx map. for (SubRegMap::const_iterator SI = SubRegs.begin(), SE = SubRegs.end(); SI != SE; ++SI) { - // Ignore idempotent sub-register indices. - if (SI->second == this) + if (SI->second == this) { + SMLoc Loc; + if (TheDef) + Loc = TheDef->getLoc(); + throw TGError(Loc, "Register " + getName() + + " has itself as a sub-register"); + } + // Ensure that every sub-register has a unique name. + DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins = + SubReg2Idx.insert(std::make_pair(SI->second, SI->first)).first; + if (Ins->second == SI->first) continue; - // Is is possible to have multiple names for the same sub-register. - // For example, XMM0 appears as sub_xmm, sub_sd, and sub_ss in YMM0. - // Eventually, this degeneration should go away, but for now we simply give - // precedence to the explicit sub-register index over the inherited ones. - SubReg2Idx.insert(std::make_pair(SI->second, SI->first)); + // Trouble: Two different names for SI->second. + SMLoc Loc; + if (TheDef) + Loc = TheDef->getLoc(); + throw TGError(Loc, "Sub-register can't have two names: " + + SI->second->getName() + " available as " + + SI->first->getName() + " and " + Ins->second->getName()); } // Derive possible names for sub-register concatenations from any explicit @@ -508,8 +489,6 @@ void CodeGenRegister::computeSuperRegs(CodeGenRegBank &RegBank) { Id.push_back(I->first->EnumValue); Id.push_back(I->second->TopoSig); - if (I->second == this) - continue; // Don't add duplicate entries. if (!I->second->SuperRegs.empty() && I->second->SuperRegs.back() == this) continue; @@ -530,8 +509,7 @@ CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet, // Add any secondary sub-registers that weren't part of the explicit tree. for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end(); I != E; ++I) - if (I->second != this) - OSet.insert(I->second); + OSet.insert(I->second); } // Compute overlapping registers. @@ -970,7 +948,7 @@ void CodeGenRegisterClass::buildRegUnitSet( // CodeGenRegBank //===----------------------------------------------------------------------===// -CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) { +CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) { // Configure register Sets to understand register classes and tuples. Sets.addFieldExpander("RegisterClass", "MemberList"); Sets.addFieldExpander("CalleeSavedRegs", "SaveList"); @@ -980,7 +958,6 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) { // More indices will be synthesized later. std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex"); std::sort(SRIs.begin(), SRIs.end(), LessRecord()); - NumNamedIndices = SRIs.size(); for (unsigned i = 0, e = SRIs.size(); i != e; ++i) getSubRegIdx(SRIs[i]); // Build composite maps from ComposedOf fields. @@ -1048,6 +1025,15 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) { CodeGenRegisterClass::computeSubClasses(*this); } +// Create a synthetic CodeGenSubRegIndex without a corresponding Record. +CodeGenSubRegIndex* +CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) { + CodeGenSubRegIndex *Idx = new CodeGenSubRegIndex(Name, Namespace, + SubRegIndices.size() + 1); + SubRegIndices.push_back(Idx); + return Idx; +} + CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) { CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def]; if (Idx) @@ -1112,7 +1098,7 @@ CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A, // None exists, synthesize one. std::string Name = A->getName() + "_then_" + B->getName(); - Comp = getSubRegIdx(new Record(Name, SMLoc(), Records)); + Comp = createSubRegIndex(Name, A->getNamespace()); A->addComposite(B, Comp); return Comp; } @@ -1132,7 +1118,7 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex*, 8> &Parts) { Name += '_'; Name += Parts[i]->getName(); } - return Idx = getSubRegIdx(new Record(Name, SMLoc(), Records)); + return Idx = createSubRegIndex(Name, Parts.front()->getNamespace()); } void CodeGenRegBank::computeComposites() { diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h index eb6724e..827063e 100644 --- a/utils/TableGen/CodeGenRegisters.h +++ b/utils/TableGen/CodeGenRegisters.h @@ -35,14 +35,17 @@ namespace llvm { /// CodeGenSubRegIndex - Represents a sub-register index. class CodeGenSubRegIndex { Record *const TheDef; + std::string Name; + std::string Namespace; public: const unsigned EnumValue; CodeGenSubRegIndex(Record *R, unsigned Enum); + CodeGenSubRegIndex(StringRef N, StringRef Nspace, unsigned Enum); - const std::string &getName() const; - std::string getNamespace() const; + const std::string &getName() const { return Name; } + const std::string &getNamespace() const { return Namespace; } std::string getQualifiedName() const; // Order CodeGenSubRegIndex pointers by EnumValue. @@ -422,13 +425,13 @@ namespace llvm { // CodeGenRegBank - Represent a target's registers and the relations between // them. class CodeGenRegBank { - RecordKeeper &Records; SetTheory Sets; // SubRegIndices. std::vector<CodeGenSubRegIndex*> SubRegIndices; DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx; - unsigned NumNamedIndices; + + CodeGenSubRegIndex *createSubRegIndex(StringRef Name, StringRef NameSpace); typedef std::map<SmallVector<CodeGenSubRegIndex*, 8>, CodeGenSubRegIndex*> ConcatIdxMap; @@ -495,7 +498,6 @@ namespace llvm { // in the .td files. The rest are synthesized such that all sub-registers // have a unique name. ArrayRef<CodeGenSubRegIndex*> getSubRegIndices() { return SubRegIndices; } - unsigned getNumNamedIndices() { return NumNamedIndices; } // Find a SubRegIndex form its Record def. CodeGenSubRegIndex *getSubRegIdx(Record*); diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index 2cdde55..e89c393 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -17,9 +17,15 @@ #include "CodeGenTarget.h" #include "llvm/TableGen/Record.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/TableGenBackend.h" @@ -35,9 +41,7 @@ struct EncodingField { EncodingField(unsigned B, unsigned W, unsigned O) : Base(B), Width(W), Offset(O) { } }; -} // End anonymous namespace -namespace { struct OperandInfo { std::vector<EncodingField> Fields; std::string Decoder; @@ -56,10 +60,25 @@ struct OperandInfo { const_iterator begin() const { return Fields.begin(); } const_iterator end() const { return Fields.end(); } }; + +typedef std::vector<uint8_t> DecoderTable; +typedef uint32_t DecoderFixup; +typedef std::vector<DecoderFixup> FixupList; +typedef std::vector<FixupList> FixupScopeList; +typedef SetVector<std::string> PredicateSet; +typedef SetVector<std::string> DecoderSet; +struct DecoderTableInfo { + DecoderTable Table; + FixupScopeList FixupStack; + PredicateSet Predicates; + DecoderSet Decoders; +}; + } // End anonymous namespace namespace { class FixedLenDecoderEmitter { + const std::vector<const CodeGenInstruction*> *NumberedInstructions; public: // Defaults preserved here for documentation, even though they aren't @@ -77,6 +96,17 @@ public: GuardPrefix(GPrefix), GuardPostfix(GPostfix), ReturnOK(ROK), ReturnFail(RFail), Locals(L) {} + // Emit the decoder state machine table. + void emitTable(formatted_raw_ostream &o, DecoderTable &Table, + unsigned Indentation, unsigned BitWidth, + StringRef Namespace) const; + void emitPredicateFunction(formatted_raw_ostream &OS, + PredicateSet &Predicates, + unsigned Indentation) const; + void emitDecoderFunction(formatted_raw_ostream &OS, + DecoderSet &Decoders, + unsigned Indentation) const; + // run - Output the code emitter void run(raw_ostream &o); @@ -120,9 +150,7 @@ static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { } // Prints the bit value for each position. static void dumpBits(raw_ostream &o, const BitsInit &bits) { - unsigned index; - - for (index = bits.getNumBits(); index > 0; index--) { + for (unsigned index = bits.getNumBits(); index > 0; --index) { switch (bitFromBits(bits, index - 1)) { case BIT_TRUE: o << "1"; @@ -238,8 +266,9 @@ public: // match the remaining undecoded encoding bits against the singleton. void recurse(); - // Emit code to decode instructions given a segment or segments of bits. - void emit(raw_ostream &o, unsigned &Indentation) const; + // Emit table entries to decode instructions given a segment or segments of + // bits. + void emitTableEntry(DecoderTableInfo &TableInfo) const; // Returns the number of fanout produced by the filter. More fanout implies // the filter distinguishes more categories of instructions. @@ -338,12 +367,7 @@ public: doFilter(); } - // The top level filter chooser has NULL as its parent. - bool isTopLevel() const { return Parent == NULL; } - - // Emit the top level typedef and decodeInstruction() function. - void emitTop(raw_ostream &o, unsigned Indentation, - const std::string &Namespace) const; + unsigned getBitWidth() const { return BitWidth; } protected: // Populates the insn given the uid. @@ -414,21 +438,28 @@ protected: bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, unsigned Opc) const; - void emitSoftFailCheck(raw_ostream &o, unsigned Indentation, - unsigned Opc) const; + bool doesOpcodeNeedPredicate(unsigned Opc) const; + unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; + void emitPredicateTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const; + + void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const; - // Emits code to decode the singleton. Return true if we have matched all the - // well-known bits. - bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, - unsigned Opc) const; + // Emits table entries to decode the singleton. + void emitSingletonTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const; // Emits code to decode the singleton, and then to decode the rest. - void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, - const Filter &Best) const; + void emitSingletonTableEntry(DecoderTableInfo &TableInfo, + const Filter &Best) const; - void emitBinaryParser(raw_ostream &o , unsigned &Indentation, + void emitBinaryParser(raw_ostream &o, unsigned &Indentation, const OperandInfo &OpInfo) const; + void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc) const; + unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc) const; + // Assign a single filter and run with it. void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); @@ -447,10 +478,10 @@ protected: // dump the conflict set to the standard error. void doFilter(); - // Emits code to decode our share of instructions. Returns true if the - // emitted code causes a return, which occurs if we know how to decode - // the instruction at this level or the instruction is not decodeable. - bool emit(raw_ostream &o, unsigned &Indentation) const; +public: + // emitTableEntries - Emit state machine entries to decode our share of + // instructions. + void emitTableEntries(DecoderTableInfo &TableInfo) const; }; } // End anonymous namespace @@ -524,11 +555,9 @@ void Filter::recurse() { // Starts by inheriting our parent filter chooser's filter bit values. std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); - unsigned bitIndex; - if (VariableInstructions.size()) { // Conservatively marks each segment position as BIT_UNSET. - for (bitIndex = 0; bitIndex < NumBits; bitIndex++) + for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) BitValueArray[StartBit + bitIndex] = BIT_UNSET; // Delegates to an inferior filter chooser for further processing on this @@ -544,7 +573,7 @@ void Filter::recurse() { } // No need to recurse for a singleton filtered instruction. - // See also Filter::emit(). + // See also Filter::emit*(). if (getNumFiltered() == 1) { //Owner->SingletonExists(LastOpcFiltered); assert(FilterChooserMap.size() == 1); @@ -557,7 +586,7 @@ void Filter::recurse() { mapIterator++) { // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. - for (bitIndex = 0; bitIndex < NumBits; bitIndex++) { + for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { if (mapIterator->first & (1ULL << bitIndex)) BitValueArray[StartBit + bitIndex] = BIT_TRUE; else @@ -577,64 +606,100 @@ void Filter::recurse() { } } -// Emit code to decode instructions given a segment or segments of bits. -void Filter::emit(raw_ostream &o, unsigned &Indentation) const { - o.indent(Indentation) << "// Check Inst{"; - - if (NumBits > 1) - o << (StartBit + NumBits - 1) << '-'; +static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, + uint32_t DestIdx) { + // Any NumToSkip fixups in the current scope can resolve to the + // current location. + for (FixupList::const_reverse_iterator I = Fixups.rbegin(), + E = Fixups.rend(); + I != E; ++I) { + // Calculate the distance from the byte following the fixup entry byte + // to the destination. The Target is calculated from after the 16-bit + // NumToSkip entry itself, so subtract two from the displacement here + // to account for that. + uint32_t FixupIdx = *I; + uint32_t Delta = DestIdx - FixupIdx - 2; + // Our NumToSkip entries are 16-bits. Make sure our table isn't too + // big. + assert(Delta < 65536U && "disassembler decoding table too large!"); + Table[FixupIdx] = (uint8_t)Delta; + Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); + } +} - o << StartBit << "} ...\n"; +// Emit table entries to decode instructions given a segment or segments +// of bits. +void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { + TableInfo.Table.push_back(MCD::OPC_ExtractField); + TableInfo.Table.push_back(StartBit); + TableInfo.Table.push_back(NumBits); - o.indent(Indentation) << "switch (fieldFromInstruction" << Owner->BitWidth - << "(insn, " << StartBit << ", " - << NumBits << ")) {\n"; + // A new filter entry begins a new scope for fixup resolution. + TableInfo.FixupStack.push_back(FixupList()); std::map<unsigned, const FilterChooser*>::const_iterator filterIterator; - bool DefaultCase = false; + DecoderTable &Table = TableInfo.Table; + + size_t PrevFilter = 0; + bool HasFallthrough = false; for (filterIterator = FilterChooserMap.begin(); filterIterator != FilterChooserMap.end(); filterIterator++) { - // Field value -1 implies a non-empty set of variable instructions. // See also recurse(). if (filterIterator->first == (unsigned)-1) { - DefaultCase = true; - - o.indent(Indentation) << "default:\n"; - o.indent(Indentation) << " break; // fallthrough\n"; - - // Closing curly brace for the switch statement. - // This is unconventional because we want the default processing to be - // performed for the fallthrough cases as well, i.e., when the "cases" - // did not prove a decoded instruction. - o.indent(Indentation) << "}\n"; - - } else - o.indent(Indentation) << "case " << filterIterator->first << ":\n"; + HasFallthrough = true; + + // Each scope should always have at least one filter value to check + // for. + assert(PrevFilter != 0 && "empty filter set!"); + FixupList &CurScope = TableInfo.FixupStack.back(); + // Resolve any NumToSkip fixups in the current scope. + resolveTableFixups(Table, CurScope, Table.size()); + CurScope.clear(); + PrevFilter = 0; // Don't re-process the filter's fallthrough. + } else { + Table.push_back(MCD::OPC_FilterValue); + // Encode and emit the value to filter against. + uint8_t Buffer[8]; + unsigned Len = encodeULEB128(filterIterator->first, Buffer); + Table.insert(Table.end(), Buffer, Buffer + Len); + // Reserve space for the NumToSkip entry. We'll backpatch the value + // later. + PrevFilter = Table.size(); + Table.push_back(0); + Table.push_back(0); + } // We arrive at a category of instructions with the same segment value. // Now delegate to the sub filter chooser for further decodings. // The case may fallthrough, which happens if the remaining well-known // encoding bits do not match exactly. - if (!DefaultCase) { ++Indentation; ++Indentation; } - - filterIterator->second->emit(o, Indentation); - // For top level default case, there's no need for a break statement. - if (Owner->isTopLevel() && DefaultCase) - break; - - o.indent(Indentation) << "break;\n"; - - if (!DefaultCase) { --Indentation; --Indentation; } + filterIterator->second->emitTableEntries(TableInfo); + + // Now that we've emitted the body of the handler, update the NumToSkip + // of the filter itself to be able to skip forward when false. Subtract + // two as to account for the width of the NumToSkip field itself. + if (PrevFilter) { + uint32_t NumToSkip = Table.size() - PrevFilter - 2; + assert(NumToSkip < 65536U && "disassembler decoding table too large!"); + Table[PrevFilter] = (uint8_t)NumToSkip; + Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); + } } - // If there is no default case, we still need to supply a closing brace. - if (!DefaultCase) { - // Closing curly brace for the switch statement. - o.indent(Indentation) << "}\n"; - } + // Any remaining unresolved fixups bubble up to the parent fixup scope. + assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); + FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; + FixupScopeList::iterator Dest = Source - 1; + Dest->insert(Dest->end(), Source->begin(), Source->end()); + TableInfo.FixupStack.pop_back(); + + // If there is no fallthrough, then the final filter should get fixed + // up according to the enclosing scope rather than the current position. + if (!HasFallthrough) + TableInfo.FixupStack.back().push_back(PrevFilter); } // Returns the number of fanout produced by the filter. More fanout implies @@ -652,31 +717,205 @@ unsigned Filter::usefulness() const { // // ////////////////////////////////// -// Emit the top level typedef and decodeInstruction() function. -void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation, - const std::string &Namespace) const { - o.indent(Indentation) << - "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction" - << BitWidth << "(MCInst &MI, uint" << BitWidth - << "_t insn, uint64_t Address, " - << "const void *Decoder, const MCSubtargetInfo &STI) {\n"; - o.indent(Indentation) << " unsigned tmp = 0;\n"; - o.indent(Indentation) << " (void)tmp;\n"; - o.indent(Indentation) << Emitter->Locals << "\n"; - o.indent(Indentation) << " uint64_t Bits = STI.getFeatureBits();\n"; - o.indent(Indentation) << " (void)Bits;\n"; - - ++Indentation; ++Indentation; - // Emits code to decode the instructions. - emit(o, Indentation); - - o << '\n'; - o.indent(Indentation) << "return " << Emitter->ReturnFail << ";\n"; - --Indentation; --Indentation; - - o.indent(Indentation) << "}\n"; - - o << '\n'; +// Emit the decoder state machine table. +void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS, + DecoderTable &Table, + unsigned Indentation, + unsigned BitWidth, + StringRef Namespace) const { + OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace + << BitWidth << "[] = {\n"; + + Indentation += 2; + + // FIXME: We may be able to use the NumToSkip values to recover + // appropriate indentation levels. + DecoderTable::const_iterator I = Table.begin(); + DecoderTable::const_iterator E = Table.end(); + while (I != E) { + assert (I < E && "incomplete decode table entry!"); + + uint64_t Pos = I - Table.begin(); + OS << "/* " << Pos << " */"; + OS.PadToColumn(12); + + switch (*I) { + default: + throw "invalid decode table opcode"; + case MCD::OPC_ExtractField: { + ++I; + unsigned Start = *I++; + unsigned Len = *I++; + OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", " + << Len << ", // Inst{"; + if (Len > 1) + OS << (Start + Len - 1) << "-"; + OS << Start << "} ...\n"; + break; + } + case MCD::OPC_FilterValue: { + ++I; + OS.indent(Indentation) << "MCD::OPC_FilterValue, "; + // The filter value is ULEB128 encoded. + while (*I >= 128) + OS << utostr(*I++) << ", "; + OS << utostr(*I++) << ", "; + + // 16-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << utostr(Byte) << ", "; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 8; + OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_CheckField: { + ++I; + unsigned Start = *I++; + unsigned Len = *I++; + OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", " + << Len << ", ";// << Val << ", " << NumToSkip << ",\n"; + // ULEB128 encoded field value. + for (; *I >= 128; ++I) + OS << utostr(*I) << ", "; + OS << utostr(*I++) << ", "; + // 16-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << utostr(Byte) << ", "; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 8; + OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_CheckPredicate: { + ++I; + OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; + for (; *I >= 128; ++I) + OS << utostr(*I) << ", "; + OS << utostr(*I++) << ", "; + + // 16-bit numtoskip value. + uint8_t Byte = *I++; + uint32_t NumToSkip = Byte; + OS << utostr(Byte) << ", "; + Byte = *I++; + OS << utostr(Byte) << ", "; + NumToSkip |= Byte << 8; + OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; + break; + } + case MCD::OPC_Decode: { + ++I; + // Extract the ULEB128 encoded Opcode to a buffer. + uint8_t Buffer[8], *p = Buffer; + while ((*p++ = *I++) >= 128) + assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer) + && "ULEB128 value too large!"); + // Decode the Opcode value. + unsigned Opc = decodeULEB128(Buffer); + OS.indent(Indentation) << "MCD::OPC_Decode, "; + for (p = Buffer; *p >= 128; ++p) + OS << utostr(*p) << ", "; + OS << utostr(*p) << ", "; + + // Decoder index. + for (; *I >= 128; ++I) + OS << utostr(*I) << ", "; + OS << utostr(*I++) << ", "; + + OS << "// Opcode: " + << NumberedInstructions->at(Opc)->TheDef->getName() << "\n"; + break; + } + case MCD::OPC_SoftFail: { + ++I; + OS.indent(Indentation) << "MCD::OPC_SoftFail"; + // Positive mask + uint64_t Value = 0; + unsigned Shift = 0; + do { + OS << ", " << utostr(*I); + Value += (*I & 0x7f) << Shift; + Shift += 7; + } while (*I++ >= 128); + if (Value > 127) + OS << " /* 0x" << utohexstr(Value) << " */"; + // Negative mask + Value = 0; + Shift = 0; + do { + OS << ", " << utostr(*I); + Value += (*I & 0x7f) << Shift; + Shift += 7; + } while (*I++ >= 128); + if (Value > 127) + OS << " /* 0x" << utohexstr(Value) << " */"; + OS << ",\n"; + break; + } + case MCD::OPC_Fail: { + ++I; + OS.indent(Indentation) << "MCD::OPC_Fail,\n"; + break; + } + } + } + OS.indent(Indentation) << "0\n"; + + Indentation -= 2; + + OS.indent(Indentation) << "};\n\n"; +} + +void FixedLenDecoderEmitter:: +emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates, + unsigned Indentation) const { + // The predicate function is just a big switch statement based on the + // input predicate index. + OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " + << "uint64_t Bits) {\n"; + Indentation += 2; + OS.indent(Indentation) << "switch (Idx) {\n"; + OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; + unsigned Index = 0; + for (PredicateSet::const_iterator I = Predicates.begin(), E = Predicates.end(); + I != E; ++I, ++Index) { + OS.indent(Indentation) << "case " << Index << ":\n"; + OS.indent(Indentation+2) << "return (" << *I << ");\n"; + } + OS.indent(Indentation) << "}\n"; + Indentation -= 2; + OS.indent(Indentation) << "}\n\n"; +} + +void FixedLenDecoderEmitter:: +emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, + unsigned Indentation) const { + // The decoder function is just a big switch statement based on the + // input decoder index. + OS.indent(Indentation) << "template<typename InsnType>\n"; + OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," + << " unsigned Idx, InsnType insn, MCInst &MI,\n"; + OS.indent(Indentation) << " uint64_t " + << "Address, const void *Decoder) {\n"; + Indentation += 2; + OS.indent(Indentation) << "InsnType tmp;\n"; + OS.indent(Indentation) << "switch (Idx) {\n"; + OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; + unsigned Index = 0; + for (DecoderSet::const_iterator I = Decoders.begin(), E = Decoders.end(); + I != E; ++I, ++Index) { + OS.indent(Indentation) << "case " << Index << ":\n"; + OS << *I; + OS.indent(Indentation+2) << "return S;\n"; + } + OS.indent(Indentation) << "}\n"; + Indentation -= 2; + OS.indent(Indentation) << "}\n\n"; } // Populates the field of the insn given the start position and the number of @@ -703,9 +942,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, /// filter array as a series of chars. void FilterChooser::dumpFilterArray(raw_ostream &o, const std::vector<bit_value_t> &filter) const { - unsigned bitIndex; - - for (bitIndex = BitWidth; bitIndex > 0; bitIndex--) { + for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { switch (filter[bitIndex - 1]) { case BIT_UNFILTERED: o << "."; @@ -827,26 +1064,71 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, if (OpInfo.numFields() == 1) { OperandInfo::const_iterator OI = OpInfo.begin(); - o.indent(Indentation) << " tmp = fieldFromInstruction" << BitWidth - << "(insn, " << OI->Base << ", " << OI->Width - << ");\n"; + o.indent(Indentation) << "tmp = fieldFromInstruction" + << "(insn, " << OI->Base << ", " << OI->Width + << ");\n"; } else { - o.indent(Indentation) << " tmp = 0;\n"; + o.indent(Indentation) << "tmp = 0;\n"; for (OperandInfo::const_iterator OI = OpInfo.begin(), OE = OpInfo.end(); OI != OE; ++OI) { - o.indent(Indentation) << " tmp |= (fieldFromInstruction" << BitWidth + o.indent(Indentation) << "tmp |= (fieldFromInstruction" << "(insn, " << OI->Base << ", " << OI->Width << ") << " << OI->Offset << ");\n"; } } if (Decoder != "") - o.indent(Indentation) << " " << Emitter->GuardPrefix << Decoder + o.indent(Indentation) << Emitter->GuardPrefix << Decoder << "(MI, tmp, Address, Decoder)" << Emitter->GuardPostfix << "\n"; else - o.indent(Indentation) << " MI.addOperand(MCOperand::CreateImm(tmp));\n"; + o.indent(Indentation) << "MI.addOperand(MCOperand::CreateImm(tmp));\n"; + +} + +void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, + unsigned Opc) const { + std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter = + Operands.find(Opc); + const std::vector<OperandInfo>& InsnOperands = OpIter->second; + for (std::vector<OperandInfo>::const_iterator + I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) { + // If a custom instruction decoder was specified, use that. + if (I->numFields() == 0 && I->Decoder.size()) { + OS.indent(Indentation) << Emitter->GuardPrefix << I->Decoder + << "(MI, insn, Address, Decoder)" + << Emitter->GuardPostfix << "\n"; + break; + } + emitBinaryParser(OS, Indentation, *I); + } +} + +unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, + unsigned Opc) const { + // Build up the predicate string. + SmallString<256> Decoder; + // FIXME: emitDecoder() function can take a buffer directly rather than + // a stream. + raw_svector_ostream S(Decoder); + unsigned I = 4; + emitDecoder(S, I, Opc); + S.flush(); + + // Using the full decoder string as the key value here is a bit + // heavyweight, but is effective. If the string comparisons become a + // performance concern, we can implement a mangling of the predicate + // data easilly enough with a map back to the actual string. That's + // overkill for now, though. + + // Make sure the predicate is in the table. + Decoders.insert(Decoder.str()); + // Now figure out the index for when we write out the table. + DecoderSet::const_iterator P = std::find(Decoders.begin(), + Decoders.end(), + Decoder.str()); + return (unsigned)(P - Decoders.begin()); } static void emitSinglePredicateMatch(raw_ostream &o, StringRef str, @@ -887,8 +1169,74 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, return Predicates->getSize() > 0; } -void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation, - unsigned Opc) const { +bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { + ListInit *Predicates = + AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates"); + for (unsigned i = 0; i < Predicates->getSize(); ++i) { + Record *Pred = Predicates->getElementAsRecord(i); + if (!Pred->getValue("AssemblerMatcherPredicate")) + continue; + + std::string P = Pred->getValueAsString("AssemblerCondString"); + + if (!P.length()) + continue; + + return true; + } + return false; +} + +unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, + StringRef Predicate) const { + // Using the full predicate string as the key value here is a bit + // heavyweight, but is effective. If the string comparisons become a + // performance concern, we can implement a mangling of the predicate + // data easilly enough with a map back to the actual string. That's + // overkill for now, though. + + // Make sure the predicate is in the table. + TableInfo.Predicates.insert(Predicate.str()); + // Now figure out the index for when we write out the table. + PredicateSet::const_iterator P = std::find(TableInfo.Predicates.begin(), + TableInfo.Predicates.end(), + Predicate.str()); + return (unsigned)(P - TableInfo.Predicates.begin()); +} + +void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const { + if (!doesOpcodeNeedPredicate(Opc)) + return; + + // Build up the predicate string. + SmallString<256> Predicate; + // FIXME: emitPredicateMatch() functions can take a buffer directly rather + // than a stream. + raw_svector_ostream PS(Predicate); + unsigned I = 0; + emitPredicateMatch(PS, I, Opc); + + // Figure out the index into the predicate table for the predicate just + // computed. + unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); + SmallString<16> PBytes; + raw_svector_ostream S(PBytes); + encodeULEB128(PIdx, S); + S.flush(); + + TableInfo.Table.push_back(MCD::OPC_CheckPredicate); + // Predicate index + for (unsigned i = 0, e = PBytes.size(); i != e; ++i) + TableInfo.Table.push_back(PBytes[i]); + // Push location for NumToSkip backpatching. + TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); +} + +void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const { BitsInit *SFBits = AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail"); if (!SFBits) return; @@ -914,13 +1262,11 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation, default: // The bit is not set; this must be an error! StringRef Name = AllInstructions[Opc]->TheDef->getName(); - errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " - << Name - << " is set but Inst{" << i <<"} is unset!\n" + errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " << Name + << " is set but Inst{" << i << "} is unset!\n" << " - You can only mark a bit as SoftFail if it is fully defined" << " (1/0 - not '?') in Inst\n"; - o << "#error SoftFail Conflict, " << Name << "::SoftFail{" << i - << "} set but Inst{" << i << "} undefined!\n"; + return; } } @@ -930,27 +1276,31 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation, if (!NeedPositiveMask && !NeedNegativeMask) return; - std::string PositiveMaskStr = PositiveMask.toString(16, /*signed=*/false); - std::string NegativeMaskStr = NegativeMask.toString(16, /*signed=*/false); - StringRef BitExt = ""; - if (BitWidth > 32) - BitExt = "ULL"; - - o.indent(Indentation) << "if ("; - if (NeedPositiveMask) - o << "insn & 0x" << PositiveMaskStr << BitExt; - if (NeedPositiveMask && NeedNegativeMask) - o << " || "; - if (NeedNegativeMask) - o << "~insn & 0x" << NegativeMaskStr << BitExt; - o << ")\n"; - o.indent(Indentation+2) << "S = MCDisassembler::SoftFail;\n"; + TableInfo.Table.push_back(MCD::OPC_SoftFail); + + SmallString<16> MaskBytes; + raw_svector_ostream S(MaskBytes); + if (NeedPositiveMask) { + encodeULEB128(PositiveMask.getZExtValue(), S); + S.flush(); + for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) + TableInfo.Table.push_back(MaskBytes[i]); + } else + TableInfo.Table.push_back(0); + if (NeedNegativeMask) { + MaskBytes.clear(); + S.resync(); + encodeULEB128(NegativeMask.getZExtValue(), S); + S.flush(); + for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) + TableInfo.Table.push_back(MaskBytes[i]); + } else + TableInfo.Table.push_back(0); } -// Emits code to decode the singleton. Return true if we have matched all the -// well-known bits. -bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, - unsigned Opc) const { +// Emits table entries to decode the singleton. +void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, + unsigned Opc) const { std::vector<unsigned> StartBits; std::vector<unsigned> EndBits; std::vector<uint64_t> FieldVals; @@ -961,107 +1311,70 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, getIslands(StartBits, EndBits, FieldVals, Insn); unsigned Size = StartBits.size(); - unsigned I, NumBits; - // If we have matched all the well-known bits, just issue a return. - if (Size == 0) { - o.indent(Indentation) << "if ("; - if (!emitPredicateMatch(o, Indentation, Opc)) - o << "1"; - o << ") {\n"; - emitSoftFailCheck(o, Indentation+2, Opc); - o.indent(Indentation) << " MI.setOpcode(" << Opc << ");\n"; - std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter = - Operands.find(Opc); - const std::vector<OperandInfo>& InsnOperands = OpIter->second; - for (std::vector<OperandInfo>::const_iterator - I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) { - // If a custom instruction decoder was specified, use that. - if (I->numFields() == 0 && I->Decoder.size()) { - o.indent(Indentation) << " " << Emitter->GuardPrefix << I->Decoder - << "(MI, insn, Address, Decoder)" - << Emitter->GuardPostfix << "\n"; - break; - } - - emitBinaryParser(o, Indentation, *I); - } - - o.indent(Indentation) << " return " << Emitter->ReturnOK << "; // " - << nameWithID(Opc) << '\n'; - o.indent(Indentation) << "}\n"; // Closing predicate block. - return true; - } - - // Otherwise, there are more decodings to be done! - - // Emit code to match the island(s) for the singleton. - o.indent(Indentation) << "// Check "; - - for (I = Size; I != 0; --I) { - o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} "; - if (I > 1) - o << " && "; - else - o << "for singleton decoding...\n"; - } - - o.indent(Indentation) << "if ("; - if (emitPredicateMatch(o, Indentation, Opc)) { - o << " &&\n"; - o.indent(Indentation+4); + // Emit the predicate table entry if one is needed. + emitPredicateTableEntry(TableInfo, Opc); + + // Check any additional encoding fields needed. + for (unsigned I = Size; I != 0; --I) { + unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1; + TableInfo.Table.push_back(MCD::OPC_CheckField); + TableInfo.Table.push_back(StartBits[I-1]); + TableInfo.Table.push_back(NumBits); + uint8_t Buffer[8], *p; + encodeULEB128(FieldVals[I-1], Buffer); + for (p = Buffer; *p >= 128 ; ++p) + TableInfo.Table.push_back(*p); + TableInfo.Table.push_back(*p); + // Push location for NumToSkip backpatching. + TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); + // The fixup is always 16-bits, so go ahead and allocate the space + // in the table so all our relative position calculations work OK even + // before we fully resolve the real value here. + TableInfo.Table.push_back(0); + TableInfo.Table.push_back(0); } - for (I = Size; I != 0; --I) { - NumBits = EndBits[I-1] - StartBits[I-1] + 1; - o << "fieldFromInstruction" << BitWidth << "(insn, " - << StartBits[I-1] << ", " << NumBits - << ") == " << FieldVals[I-1]; - if (I > 1) - o << " && "; - else - o << ") {\n"; - } - emitSoftFailCheck(o, Indentation+2, Opc); - o.indent(Indentation) << " MI.setOpcode(" << Opc << ");\n"; - std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter = - Operands.find(Opc); - const std::vector<OperandInfo>& InsnOperands = OpIter->second; - for (std::vector<OperandInfo>::const_iterator - I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) { - // If a custom instruction decoder was specified, use that. - if (I->numFields() == 0 && I->Decoder.size()) { - o.indent(Indentation) << " " << Emitter->GuardPrefix << I->Decoder - << "(MI, insn, Address, Decoder)" - << Emitter->GuardPostfix << "\n"; - break; - } - - emitBinaryParser(o, Indentation, *I); - } - o.indent(Indentation) << " return " << Emitter->ReturnOK << "; // " - << nameWithID(Opc) << '\n'; - o.indent(Indentation) << "}\n"; - - return false; + // Check for soft failure of the match. + emitSoftFailTableEntry(TableInfo, Opc); + + TableInfo.Table.push_back(MCD::OPC_Decode); + uint8_t Buffer[8], *p; + encodeULEB128(Opc, Buffer); + for (p = Buffer; *p >= 128 ; ++p) + TableInfo.Table.push_back(*p); + TableInfo.Table.push_back(*p); + + unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc); + SmallString<16> Bytes; + raw_svector_ostream S(Bytes); + encodeULEB128(DIdx, S); + S.flush(); + + // Decoder index + for (unsigned i = 0, e = Bytes.size(); i != e; ++i) + TableInfo.Table.push_back(Bytes[i]); } -// Emits code to decode the singleton, and then to decode the rest. -void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, - const Filter &Best) const { - +// Emits table entries to decode the singleton, and then to decode the rest. +void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, + const Filter &Best) const { unsigned Opc = Best.getSingletonOpc(); - emitSingletonDecoder(o, Indentation, Opc); + // complex singletons need predicate checks from the first singleton + // to refer forward to the variable filterchooser that follows. + TableInfo.FixupStack.push_back(FixupList()); - // Emit code for the rest. - o.indent(Indentation) << "else\n"; + emitSingletonTableEntry(TableInfo, Opc); - Indentation += 2; - Best.getVariableFC().emit(o, Indentation); - Indentation -= 2; + resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), + TableInfo.Table.size()); + TableInfo.FixupStack.pop_back(); + + Best.getVariableFC().emitTableEntries(TableInfo); } + // Assign a single filter and run with it. Top level API client can initialize // with a single filter to start the filtering process. void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, @@ -1119,7 +1432,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { } } - unsigned BitIndex, InsnIndex; + unsigned BitIndex; // We maintain BIT_WIDTH copies of the bitAttrs automaton. // The automaton consumes the corresponding bit from each @@ -1149,7 +1462,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { else bitAttrs.push_back(ATTR_NONE); - for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { + for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { insn_t insn; insnWithID(insn, Opcodes[InsnIndex]); @@ -1200,7 +1513,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { bitAttr_t RA = ATTR_NONE; unsigned StartBit = 0; - for (BitIndex = 0; BitIndex < BitWidth; BitIndex++) { + for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { bitAttr_t bitAttr = bitAttrs[BitIndex]; assert(bitAttr != ATTR_NONE && "Bit without attributes"); @@ -1341,36 +1654,29 @@ void FilterChooser::doFilter() { BestIndex = -1; } -// Emits code to decode our share of instructions. Returns true if the -// emitted code causes a return, which occurs if we know how to decode -// the instruction at this level or the instruction is not decodeable. -bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const { - if (Opcodes.size() == 1) +// emitTableEntries - Emit state machine entries to decode our share of +// instructions. +void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { + if (Opcodes.size() == 1) { // There is only one instruction in the set, which is great! // Call emitSingletonDecoder() to see whether there are any remaining // encodings bits. - return emitSingletonDecoder(o, Indentation, Opcodes[0]); + emitSingletonTableEntry(TableInfo, Opcodes[0]); + return; + } // Choose the best filter to do the decodings! if (BestIndex != -1) { const Filter &Best = Filters[BestIndex]; if (Best.getNumFiltered() == 1) - emitSingletonDecoder(o, Indentation, Best); + emitSingletonTableEntry(TableInfo, Best); else - Best.emit(o, Indentation); - return false; + Best.emitTableEntry(TableInfo); + return; } - // We don't know how to decode these instructions! Return 0 and dump the - // conflict set! - o.indent(Indentation) << "return 0;" << " // Conflict set: "; - for (int i = 0, N = Opcodes.size(); i < N; ++i) { - o << nameWithID(Opcodes[i]); - if (i < (N - 1)) - o << ", "; - else - o << '\n'; - } + // We don't know how to decode these instructions! Dump the + // conflict set and bail. // Print out useful conflict information for postmortem analysis. errs() << "Decoding Conflict:\n"; @@ -1385,8 +1691,6 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const { getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst")); errs() << '\n'; } - - return true; } static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc, @@ -1549,62 +1853,168 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc, return true; } -static void emitHelper(llvm::raw_ostream &o, unsigned BitWidth) { - unsigned Indentation = 0; - std::string WidthStr = "uint" + utostr(BitWidth) + "_t"; - - o << '\n'; - - o.indent(Indentation) << "static " << WidthStr << - " fieldFromInstruction" << BitWidth << - "(" << WidthStr <<" insn, unsigned startBit, unsigned numBits)\n"; - - o.indent(Indentation) << "{\n"; - - ++Indentation; ++Indentation; - o.indent(Indentation) << "assert(startBit + numBits <= " << BitWidth - << " && \"Instruction field out of bounds!\");\n"; - o << '\n'; - o.indent(Indentation) << WidthStr << " fieldMask;\n"; - o << '\n'; - o.indent(Indentation) << "if (numBits == " << BitWidth << ")\n"; - - ++Indentation; ++Indentation; - o.indent(Indentation) << "fieldMask = (" << WidthStr << ")-1;\n"; - --Indentation; --Indentation; - - o.indent(Indentation) << "else\n"; - - ++Indentation; ++Indentation; - o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n"; - --Indentation; --Indentation; - - o << '\n'; - o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n"; - --Indentation; --Indentation; - - o.indent(Indentation) << "}\n"; +// emitFieldFromInstruction - Emit the templated helper function +// fieldFromInstruction(). +static void emitFieldFromInstruction(formatted_raw_ostream &OS) { + OS << "// Helper function for extracting fields from encoded instructions.\n" + << "template<typename InsnType>\n" + << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n" + << " unsigned numBits) {\n" + << " assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n" + << " \"Instruction field out of bounds!\");\n" + << " InsnType fieldMask;\n" + << " if (numBits == sizeof(InsnType)*8)\n" + << " fieldMask = (InsnType)(-1LL);\n" + << " else\n" + << " fieldMask = ((1 << numBits) - 1) << startBit;\n" + << " return (insn & fieldMask) >> startBit;\n" + << "}\n\n"; +} - o << '\n'; +// emitDecodeInstruction - Emit the templated helper function +// decodeInstruction(). +static void emitDecodeInstruction(formatted_raw_ostream &OS) { + OS << "template<typename InsnType>\n" + << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,\n" + << " InsnType insn, uint64_t Address,\n" + << " const void *DisAsm,\n" + << " const MCSubtargetInfo &STI) {\n" + << " uint64_t Bits = STI.getFeatureBits();\n" + << "\n" + << " const uint8_t *Ptr = DecodeTable;\n" + << " uint32_t CurFieldValue;\n" + << " DecodeStatus S = MCDisassembler::Success;\n" + << " for (;;) {\n" + << " ptrdiff_t Loc = Ptr - DecodeTable;\n" + << " switch (*Ptr) {\n" + << " default:\n" + << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" + << " return MCDisassembler::Fail;\n" + << " case MCD::OPC_ExtractField: {\n" + << " unsigned Start = *++Ptr;\n" + << " unsigned Len = *++Ptr;\n" + << " ++Ptr;\n" + << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" + << " DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << \", \"\n" + << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_FilterValue: {\n" + << " // Decode the field value.\n" + << " unsigned Len;\n" + << " InsnType Val = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " // NumToSkip is a plain 16-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << "\n" + << " // Perform the filter operation.\n" + << " if (Val != CurFieldValue)\n" + << " Ptr += NumToSkip;\n" + << " DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << \", \" << NumToSkip\n" + << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" : \"PASS:\")\n" + << " << \" continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n" + << "\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_CheckField: {\n" + << " unsigned Start = *++Ptr;\n" + << " unsigned Len = *++Ptr;\n" + << " InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n" + << " // Decode the field value.\n" + << " uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " // NumToSkip is a plain 16-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << "\n" + << " // If the actual and expected values don't match, skip.\n" + << " if (ExpectedValue != FieldValue)\n" + << " Ptr += NumToSkip;\n" + << " DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << \", \"\n" + << " << Len << \", \" << ExpectedValue << \", \" << NumToSkip\n" + << " << \"): FieldValue = \" << FieldValue << \", ExpectedValue = \"\n" + << " << ExpectedValue << \": \"\n" + << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : \"FAIL\\n\"));\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_CheckPredicate: {\n" + << " unsigned Len;\n" + << " // Decode the Predicate Index value.\n" + << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " // NumToSkip is a plain 16-bit integer.\n" + << " unsigned NumToSkip = *Ptr++;\n" + << " NumToSkip |= (*Ptr++) << 8;\n" + << " // Check the predicate.\n" + << " bool Pred;\n" + << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" + << " Ptr += NumToSkip;\n" + << " (void)Pred;\n" + << " DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx << \"): \"\n" + << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" + << "\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_Decode: {\n" + << " unsigned Len;\n" + << " // Decode the Opcode value.\n" + << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" + << " Ptr += Len;\n" + << " DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" + << " << \", using decoder \" << DecodeIdx << \"\\n\" );\n" + << " DEBUG(dbgs() << \"----- DECODE SUCCESSFUL -----\\n\");\n" + << "\n" + << " MI.setOpcode(Opc);\n" + << " return decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm);\n" + << " }\n" + << " case MCD::OPC_SoftFail: {\n" + << " // Decode the mask values.\n" + << " unsigned Len;\n" + << " InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n" + << " Ptr += Len;\n" + << " InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n" + << " Ptr += Len;\n" + << " bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n" + << " if (Fail)\n" + << " S = MCDisassembler::SoftFail;\n" + << " DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? \"FAIL\\n\":\"PASS\\n\"));\n" + << " break;\n" + << " }\n" + << " case MCD::OPC_Fail: {\n" + << " DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" + << " return MCDisassembler::Fail;\n" + << " }\n" + << " }\n" + << " }\n" + << " llvm_unreachable(\"bogosity detected in disassembler state machine!\");\n" + << "}\n\n"; } // Emits disassembler code for instruction decoding. void FixedLenDecoderEmitter::run(raw_ostream &o) { - o << "#include \"llvm/MC/MCInst.h\"\n"; - o << "#include \"llvm/Support/DataTypes.h\"\n"; - o << "#include <assert.h>\n"; - o << '\n'; - o << "namespace llvm {\n\n"; + formatted_raw_ostream OS(o); + OS << "#include \"llvm/MC/MCInst.h\"\n"; + OS << "#include \"llvm/Support/Debug.h\"\n"; + OS << "#include \"llvm/Support/DataTypes.h\"\n"; + OS << "#include \"llvm/Support/LEB128.h\"\n"; + OS << "#include \"llvm/Support/raw_ostream.h\"\n"; + OS << "#include <assert.h>\n"; + OS << '\n'; + OS << "namespace llvm {\n\n"; + + emitFieldFromInstruction(OS); // Parameterize the decoders based on namespace and instruction width. - const std::vector<const CodeGenInstruction*> &NumberedInstructions = - Target.getInstructionsByEnumValue(); + NumberedInstructions = &Target.getInstructionsByEnumValue(); std::map<std::pair<std::string, unsigned>, std::vector<unsigned> > OpcMap; std::map<unsigned, std::vector<OperandInfo> > Operands; - for (unsigned i = 0; i < NumberedInstructions.size(); ++i) { - const CodeGenInstruction *Inst = NumberedInstructions[i]; + for (unsigned i = 0; i < NumberedInstructions->size(); ++i) { + const CodeGenInstruction *Inst = NumberedInstructions->at(i); const Record *Def = Inst->TheDef; unsigned Size = Def->getValueAsInt("Size"); if (Def->getValueAsString("Namespace") == "TargetOpcode" || @@ -1622,24 +2032,48 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { } } + DecoderTableInfo TableInfo; std::set<unsigned> Sizes; for (std::map<std::pair<std::string, unsigned>, std::vector<unsigned> >::const_iterator I = OpcMap.begin(), E = OpcMap.end(); I != E; ++I) { - // If we haven't visited this instruction width before, emit the - // helper method to extract fields. - if (!Sizes.count(I->first.second)) { - emitHelper(o, 8*I->first.second); - Sizes.insert(I->first.second); - } - // Emit the decoder for this namespace+width combination. - FilterChooser FC(NumberedInstructions, I->second, Operands, + FilterChooser FC(*NumberedInstructions, I->second, Operands, 8*I->first.second, this); - FC.emitTop(o, 0, I->first.first); + + // The decode table is cleared for each top level decoder function. The + // predicates and decoders themselves, however, are shared across all + // decoders to give more opportunities for uniqueing. + TableInfo.Table.clear(); + TableInfo.FixupStack.clear(); + TableInfo.Table.reserve(16384); + TableInfo.FixupStack.push_back(FixupList()); + FC.emitTableEntries(TableInfo); + // Any NumToSkip fixups in the top level scope can resolve to the + // OPC_Fail at the end of the table. + assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); + // Resolve any NumToSkip fixups in the current scope. + resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), + TableInfo.Table.size()); + TableInfo.FixupStack.clear(); + + TableInfo.Table.push_back(MCD::OPC_Fail); + + // Print the table to the output stream. + emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), I->first.first); + OS.flush(); } - o << "\n} // End llvm namespace \n"; + // Emit the predicate function. + emitPredicateFunction(OS, TableInfo.Predicates, 0); + + // Emit the decoder function. + emitDecoderFunction(OS, TableInfo.Decoders, 0); + + // Emit the main entry point for the decoder, decodeInstruction(). + emitDecodeInstruction(OS); + + OS << "\n} // End llvm namespace\n"; } namespace llvm { diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index 3adb869..b41ad94 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -319,6 +319,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, if (Inst.isCompare) OS << "|(1<<MCID::Compare)"; if (Inst.isMoveImm) OS << "|(1<<MCID::MoveImm)"; if (Inst.isBitcast) OS << "|(1<<MCID::Bitcast)"; + if (Inst.isSelect) OS << "|(1<<MCID::Select)"; if (Inst.isBarrier) OS << "|(1<<MCID::Barrier)"; if (Inst.hasDelaySlot) OS << "|(1<<MCID::DelaySlot)"; if (Inst.isCall) OS << "|(1<<MCID::Call)"; diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index 3d8d515..02546df 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -145,9 +145,9 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS, if (!Namespace.empty()) OS << "namespace " << Namespace << " {\n"; OS << "enum {\n NoSubRegister,\n"; - for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i) + for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) OS << " " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n"; - OS << " NUM_TARGET_NAMED_SUBREGS\n};\n"; + OS << " NUM_TARGET_SUBREGS\n};\n"; if (!Namespace.empty()) OS << "}\n"; } @@ -479,16 +479,12 @@ public: } }; -static void printRegister(raw_ostream &OS, const CodeGenRegister *Reg) { - OS << getQualifiedName(Reg->TheDef); -} - static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) { OS << getEnumName(VT); } static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) { - OS << Idx->getQualifiedName(); + OS << Idx->EnumValue; } // Differentially encoded register and regunit lists allow for better @@ -517,6 +513,19 @@ DiffVec &diffEncode(DiffVec &V, unsigned InitVal, ArrayRef<unsigned> List) { return V; } +template<typename Iter> +static +DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) { + assert(V.empty() && "Clear DiffVec before diffEncode."); + uint16_t Val = uint16_t(InitVal); + for (Iter I = Begin; I != End; ++I) { + uint16_t Cur = (*I)->EnumValue; + V.push_back(Cur - Val); + Val = Cur; + } + return V; +} + static void printDiff16(raw_ostream &OS, uint16_t Val) { OS << Val; } @@ -537,15 +546,21 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, // The lists of sub-registers, super-registers, and overlaps all go in the // same array. That allows us to share suffixes. typedef std::vector<const CodeGenRegister*> RegVec; - SmallVector<RegVec, 4> SubRegLists(Regs.size()); - SmallVector<RegVec, 4> OverlapLists(Regs.size()); - SequenceToOffsetTable<RegVec, CodeGenRegister::Less> RegSeqs; // Differentially encoded lists. SequenceToOffsetTable<DiffVec> DiffSeqs; + SmallVector<DiffVec, 4> SubRegLists(Regs.size()); + SmallVector<DiffVec, 4> SuperRegLists(Regs.size()); + SmallVector<DiffVec, 4> OverlapLists(Regs.size()); SmallVector<DiffVec, 4> RegUnitLists(Regs.size()); SmallVector<unsigned, 4> RegUnitInitScale(Regs.size()); + // Keep track of sub-register names as well. These are not differentially + // encoded. + typedef SmallVector<const CodeGenSubRegIndex*, 4> SubRegIdxVec; + SequenceToOffsetTable<SubRegIdxVec> SubRegIdxSeqs; + SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size()); + SequenceToOffsetTable<std::string> RegStrings; // Precompute register lists for the SequenceToOffsetTable. @@ -557,37 +572,29 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, // Compute the ordered sub-register list. SetVector<const CodeGenRegister*> SR; Reg->addSubRegsPreOrder(SR, RegBank); - RegVec &SubRegList = SubRegLists[i]; - SubRegList.assign(SR.begin(), SR.end()); - RegSeqs.add(SubRegList); + diffEncode(SubRegLists[i], Reg->EnumValue, SR.begin(), SR.end()); + DiffSeqs.add(SubRegLists[i]); + + // Compute the corresponding sub-register indexes. + SubRegIdxVec &SRIs = SubRegIdxLists[i]; + for (unsigned j = 0, je = SR.size(); j != je; ++j) + SRIs.push_back(Reg->getSubRegIndex(SR[j])); + SubRegIdxSeqs.add(SRIs); // Super-registers are already computed. const RegVec &SuperRegList = Reg->getSuperRegs(); - RegSeqs.add(SuperRegList); - - // The list of overlaps doesn't need to have any particular order, except - // Reg itself must be the first element. Pick an ordering that has one of - // the other lists as a suffix. - RegVec &OverlapList = OverlapLists[i]; - const RegVec &Suffix = SubRegList.size() > SuperRegList.size() ? - SubRegList : SuperRegList; - CodeGenRegister::Set Omit(Suffix.begin(), Suffix.end()); + diffEncode(SuperRegLists[i], Reg->EnumValue, + SuperRegList.begin(), SuperRegList.end()); + DiffSeqs.add(SuperRegLists[i]); - // First element is Reg itself. - OverlapList.push_back(Reg); - Omit.insert(Reg); - - // Any elements not in Suffix. + // The list of overlaps doesn't need to have any particular order, and Reg + // itself must be omitted. + DiffVec &OverlapList = OverlapLists[i]; CodeGenRegister::Set OSet; Reg->computeOverlaps(OSet, RegBank); - std::set_difference(OSet.begin(), OSet.end(), - Omit.begin(), Omit.end(), - std::back_inserter(OverlapList), - CodeGenRegister::Less()); - - // Finally, Suffix itself. - OverlapList.insert(OverlapList.end(), Suffix.begin(), Suffix.end()); - RegSeqs.add(OverlapList); + OSet.erase(Reg); + diffEncode(OverlapList, Reg->EnumValue, OSet.begin(), OSet.end()); + DiffSeqs.add(OverlapList); // Differentially encode the register unit list, seeded by register number. // First compute a scale factor that allows more diff-lists to be reused: @@ -616,23 +623,23 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, } // Compute the final layout of the sequence table. - RegSeqs.layout(); DiffSeqs.layout(); + SubRegIdxSeqs.layout(); OS << "namespace llvm {\n\n"; const std::string &TargetName = Target.getName(); - // Emit the shared table of register lists. - OS << "extern const uint16_t " << TargetName << "RegLists[] = {\n"; - RegSeqs.emit(OS, printRegister); - OS << "};\n\n"; - // Emit the shared table of differential lists. OS << "extern const uint16_t " << TargetName << "RegDiffLists[] = {\n"; DiffSeqs.emit(OS, printDiff16); OS << "};\n\n"; + // Emit the table of sub-register indexes. + OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[] = {\n"; + SubRegIdxSeqs.emit(OS, printSubRegIndex); + OS << "};\n\n"; + // Emit the string table. RegStrings.layout(); OS << "extern const char " << TargetName << "RegStrings[] = {\n"; @@ -641,15 +648,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[] = { // Descriptors\n"; - OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0 },\n"; + OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n"; // Emit the register descriptors now. for (unsigned i = 0, e = Regs.size(); i != e; ++i) { const CodeGenRegister *Reg = Regs[i]; OS << " { " << RegStrings.get(Reg->getName()) << ", " - << RegSeqs.get(OverlapLists[i]) << ", " - << RegSeqs.get(SubRegLists[i]) << ", " - << RegSeqs.get(Reg->getSuperRegs()) << ", " + << DiffSeqs.get(OverlapLists[i]) << ", " + << DiffSeqs.get(SubRegLists[i]) << ", " + << DiffSeqs.get(SuperRegLists[i]) << ", " + << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", " << (DiffSeqs.get(RegUnitLists[i])*16 + RegUnitInitScale[i]) << " },\n"; } OS << "};\n\n"; // End of register descriptors... @@ -729,37 +737,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, OS << "};\n\n"; - // Emit the data table for getSubReg(). ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices(); - if (SubRegIndices.size()) { - OS << "const uint16_t " << TargetName << "SubRegTable[][" - << SubRegIndices.size() << "] = {\n"; - for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs(); - OS << " /* " << Regs[i]->TheDef->getName() << " */\n"; - if (SRM.empty()) { - OS << " {0},\n"; - continue; - } - OS << " {"; - for (unsigned j = 0, je = SubRegIndices.size(); j != je; ++j) { - // FIXME: We really should keep this to 80 columns... - CodeGenRegister::SubRegMap::const_iterator SubReg = - SRM.find(SubRegIndices[j]); - if (SubReg != SRM.end()) - OS << getQualifiedName(SubReg->second->TheDef); - else - OS << "0"; - if (j != je - 1) - OS << ", "; - } - OS << "}" << (i != e ? "," : "") << "\n"; - } - OS << "};\n\n"; - OS << "const uint16_t *get" << TargetName - << "SubRegTable() {\n return (const uint16_t *)" << TargetName - << "SubRegTable;\n}\n\n"; - } EmitRegMappingTables(OS, Regs, false); @@ -783,22 +761,17 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, // MCRegisterInfo initialization routine. OS << "static inline void Init" << TargetName << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, " - << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n"; - OS << " RI->InitMCRegisterInfo(" << TargetName << "RegDesc, " + << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n" + << " RI->InitMCRegisterInfo(" << TargetName << "RegDesc, " << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, " << RegisterClasses.size() << ", " << TargetName << "RegUnitRoots, " << RegBank.getNumNativeRegUnits() << ", " - << TargetName << "RegLists, " << TargetName << "RegDiffLists, " - << TargetName << "RegStrings, "; - if (SubRegIndices.size() != 0) - OS << "(uint16_t*)" << TargetName << "SubRegTable, " - << SubRegIndices.size() << ",\n"; - else - OS << "NULL, 0,\n"; - - OS << " " << TargetName << "RegEncodingTable);\n\n"; + << TargetName << "RegStrings, " + << TargetName << "SubRegIdxLists, " + << SubRegIndices.size() << ",\n" + << " " << TargetName << "RegEncodingTable);\n\n"; EmitRegMapping(OS, Regs, false); @@ -912,17 +885,6 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, } OS << "\" };\n\n"; - // Emit names of the anonymous subreg indices. - unsigned NamedIndices = RegBank.getNumNamedIndices(); - if (SubRegIndices.size() > NamedIndices) { - OS << " enum {"; - for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) { - OS << "\n " << SubRegIndices[i]->getName() << " = " << i+1; - if (i+1 != e) - OS << ','; - } - OS << "\n };\n\n"; - } OS << "\n"; // Now that all of the structs have been emitted, emit the instances. @@ -1148,13 +1110,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, // Emit the constructor of the class... OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n"; - OS << "extern const uint16_t " << TargetName << "RegLists[];\n"; OS << "extern const uint16_t " << TargetName << "RegDiffLists[];\n"; OS << "extern const char " << TargetName << "RegStrings[];\n"; OS << "extern const uint16_t " << TargetName << "RegUnitRoots[][2];\n"; - if (SubRegIndices.size() != 0) - OS << "extern const uint16_t *get" << TargetName - << "SubRegTable();\n"; + OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[];\n"; OS << "extern const uint16_t " << TargetName << "RegEncodingTable[];\n"; EmitRegMappingTables(OS, Regs, true); @@ -1169,17 +1128,11 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, << "MCRegisterClasses, " << RegisterClasses.size() << ",\n" << " " << TargetName << "RegUnitRoots,\n" << " " << RegBank.getNumNativeRegUnits() << ",\n" - << " " << TargetName << "RegLists,\n" << " " << TargetName << "RegDiffLists,\n" << " " << TargetName << "RegStrings,\n" - << " "; - if (SubRegIndices.size() != 0) - OS << "get" << TargetName << "SubRegTable(), " - << SubRegIndices.size() << ",\n"; - else - OS << "NULL, 0,\n"; - - OS << " " << TargetName << "RegEncodingTable);\n\n"; + << " " << TargetName << "SubRegIdxLists,\n" + << " " << SubRegIndices.size() << ",\n" + << " " << TargetName << "RegEncodingTable);\n\n"; EmitRegMapping(OS, Regs, true); diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h index 803f5bd..a098d7d 100644 --- a/utils/TableGen/StringToOffsetTable.h +++ b/utils/TableGen/StringToOffsetTable.h @@ -14,6 +14,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" +#include <cctype> namespace llvm { diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index b3bf4aa..3472343 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -590,6 +590,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ','); EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ','); EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ','); + EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ','); if (SchedModels.hasItineraryClasses()) OS << " " << PI->ItinsDef->getName(); else diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h index 0417e9d..c13a0cc 100644 --- a/utils/TableGen/X86DisassemblerShared.h +++ b/utils/TableGen/X86DisassemblerShared.h @@ -14,6 +14,7 @@ #include <string.h> #define INSTRUCTION_SPECIFIER_FIELDS \ + struct OperandSpecifier operands[X86_MAX_OPERANDS]; \ bool filtered; \ InstructionContext insnContext; \ std::string name; \ diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index 2875168..f3bd373 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -21,10 +21,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include <map> using namespace llvm; using namespace X86Disassembler; - + /// inheritsFrom - Indicates whether all instructions in one class also belong /// to another class. /// @@ -36,7 +37,7 @@ static inline bool inheritsFrom(InstructionContext child, bool VEX_LIG = false) { if (child == parent) return true; - + switch (parent) { case IC: return(inheritsFrom(child, IC_64BIT) || @@ -117,17 +118,17 @@ static inline bool inheritsFrom(InstructionContext child, /// @param upper - The class that may be preferable /// @param lower - The class that may be less preferable /// @return - True if upper is to be preferred, false otherwise. -static inline bool outranks(InstructionContext upper, +static inline bool outranks(InstructionContext upper, InstructionContext lower) { assert(upper < IC_max); assert(lower < IC_max); - + #define ENUM_ENTRY(n, r, d) r, static int ranks[IC_max] = { INSTRUCTION_CONTEXTS }; #undef ENUM_ENTRY - + return (ranks[upper] > ranks[lower]); } @@ -170,24 +171,22 @@ static inline const char* stringForOperandEncoding(OperandEncoding encoding) { } } -void DisassemblerTables::emitOneID(raw_ostream &o, - uint32_t &i, - InstrUID id, +void DisassemblerTables::emitOneID(raw_ostream &o, unsigned &i, InstrUID id, bool addComma) const { if (id) o.indent(i * 2) << format("0x%hx", id); else o.indent(i * 2) << 0; - + if (addComma) o << ", "; else o << " "; - + o << "/* "; o << InstructionSpecifiers[id].name; o << "*/"; - + o << "\n"; } @@ -197,8 +196,7 @@ void DisassemblerTables::emitOneID(raw_ostream &o, /// /// @param o - The output stream on which to emit the table. /// @param i - The indentation level for that output stream. -static void emitEmptyTable(raw_ostream &o, uint32_t &i) -{ +static void emitEmptyTable(raw_ostream &o, unsigned &i) { o.indent(i * 2) << "0x0, /* EmptyTable */\n"; } @@ -207,15 +205,12 @@ static void emitEmptyTable(raw_ostream &o, uint32_t &i) /// /// @param decision - The decision to be compacted. /// @return - The compactest available representation for the decision. -static ModRMDecisionType getDecisionType(ModRMDecision &decision) -{ +static ModRMDecisionType getDecisionType(ModRMDecision &decision) { bool satisfiesOneEntry = true; bool satisfiesSplitRM = true; bool satisfiesSplitReg = true; - uint16_t index; - - for (index = 0; index < 256; ++index) { + for (unsigned index = 0; index < 256; ++index) { if (decision.instructionIDs[index] != decision.instructionIDs[0]) satisfiesOneEntry = false; @@ -252,27 +247,25 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision) /// to a particular decision type. /// /// @param dt - The decision type. -/// @return - A pointer to the statically-allocated string (e.g., +/// @return - A pointer to the statically-allocated string (e.g., /// "MODRM_ONEENTRY" for MODRM_ONEENTRY). -static const char* stringForDecisionType(ModRMDecisionType dt) -{ +static const char* stringForDecisionType(ModRMDecisionType dt) { #define ENUM_ENTRY(n) case n: return #n; switch (dt) { default: - llvm_unreachable("Unknown decision type"); + llvm_unreachable("Unknown decision type"); MODRMTYPES - }; + }; #undef ENUM_ENTRY } - + /// stringForModifierType - Returns a statically-allocated string corresponding /// to an opcode modifier type. /// /// @param mt - The modifier type. /// @return - A pointer to the statically-allocated string (e.g., /// "MODIFIER_NONE" for MODIFIER_NONE). -static const char* stringForModifierType(ModifierType mt) -{ +static const char* stringForModifierType(ModifierType mt) { #define ENUM_ENTRY(n) case n: return #n; switch(mt) { default: @@ -281,35 +274,31 @@ static const char* stringForModifierType(ModifierType mt) }; #undef ENUM_ENTRY } - + DisassemblerTables::DisassemblerTables() { unsigned i; - + for (i = 0; i < array_lengthof(Tables); i++) { Tables[i] = new ContextDecision; memset(Tables[i], 0, sizeof(ContextDecision)); } - + HasConflicts = false; } - + DisassemblerTables::~DisassemblerTables() { unsigned i; - + for (i = 0; i < array_lengthof(Tables); i++) delete Tables[i]; } - -void DisassemblerTables::emitModRMDecision(raw_ostream &o1, - raw_ostream &o2, - uint32_t &i1, - uint32_t &i2, - ModRMDecision &decision) - const { - static uint64_t sTableNumber = 0; - static uint64_t sEntryNumber = 1; + +void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2, + unsigned &i1, unsigned &i2, + ModRMDecision &decision) const { + static uint32_t sTableNumber = 0; + static uint32_t sEntryNumber = 1; ModRMDecisionType dt = getDecisionType(decision); - uint16_t index; if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0) { @@ -338,13 +327,13 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, emitOneID(o1, i1, decision.instructionIDs[0xc0], true); // mod = 0b11 break; case MODRM_SPLITREG: - for (index = 0; index < 64; index += 8) + for (unsigned index = 0; index < 64; index += 8) emitOneID(o1, i1, decision.instructionIDs[index], true); - for (index = 0xc0; index < 256; index += 8) + for (unsigned index = 0xc0; index < 256; index += 8) emitOneID(o1, i1, decision.instructionIDs[index], true); break; case MODRM_FULL: - for (index = 0; index < 256; ++index) + for (unsigned index = 0; index < 256; ++index) emitOneID(o1, i1, decision.instructionIDs[index], true); break; } @@ -380,20 +369,15 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, ++sTableNumber; } -void DisassemblerTables::emitOpcodeDecision( - raw_ostream &o1, - raw_ostream &o2, - uint32_t &i1, - uint32_t &i2, - OpcodeDecision &decision) const { - uint16_t index; - +void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2, + unsigned &i1, unsigned &i2, + OpcodeDecision &decision) const { o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n"; i2++; o2.indent(i2) << "{" << "\n"; i2++; - for (index = 0; index < 256; ++index) { + for (unsigned index = 0; index < 256; ++index) { o2.indent(i2); o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n"; @@ -412,21 +396,16 @@ void DisassemblerTables::emitOpcodeDecision( o2.indent(i2) << "}" << "\n"; } -void DisassemblerTables::emitContextDecision( - raw_ostream &o1, - raw_ostream &o2, - uint32_t &i1, - uint32_t &i2, - ContextDecision &decision, - const char* name) const { +void DisassemblerTables::emitContextDecision(raw_ostream &o1, raw_ostream &o2, + unsigned &i1, unsigned &i2, + ContextDecision &decision, + const char* name) const { o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n"; i2++; o2.indent(i2) << "{ /* opcodeDecisions */" << "\n"; i2++; - unsigned index; - - for (index = 0; index < IC_max; ++index) { + for (unsigned index = 0; index < IC_max; ++index) { o2.indent(i2) << "/* "; o2 << stringForContext((InstructionContext)index); o2 << " */"; @@ -444,58 +423,81 @@ void DisassemblerTables::emitContextDecision( o2.indent(i2) << "};" << "\n"; } -void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) - const { +void DisassemblerTables::emitInstructionInfo(raw_ostream &o, + unsigned &i) const { + unsigned NumInstructions = InstructionSpecifiers.size(); + + o << "static const struct OperandSpecifier x86OperandSets[][" + << X86_MAX_OPERANDS << "] = {\n"; + + typedef std::vector<std::pair<const char *, const char *> > OperandListTy; + std::map<OperandListTy, unsigned> OperandSets; + + unsigned OperandSetNum = 0; + for (unsigned Index = 0; Index < NumInstructions; ++Index) { + OperandListTy OperandList; + + for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS; + ++OperandIndex) { + const char *Encoding = + stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[Index] + .operands[OperandIndex].encoding); + const char *Type = + stringForOperandType((OperandType)InstructionSpecifiers[Index] + .operands[OperandIndex].type); + OperandList.push_back(std::make_pair(Encoding, Type)); + } + unsigned &N = OperandSets[OperandList]; + if (N != 0) continue; + + N = ++OperandSetNum; + + o << " { /* " << (OperandSetNum - 1) << " */\n"; + for (unsigned i = 0, e = OperandList.size(); i != e; ++i) { + o << " { " << OperandList[i].first << ", " + << OperandList[i].second << " },\n"; + } + o << " },\n"; + } + o << "};" << "\n\n"; + o.indent(i * 2) << "static const struct InstructionSpecifier "; o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n"; - - i++; - uint16_t numInstructions = InstructionSpecifiers.size(); - uint16_t index, operandIndex; + i++; - for (index = 0; index < numInstructions; ++index) { + for (unsigned index = 0; index < NumInstructions; ++index) { o.indent(i * 2) << "{ /* " << index << " */" << "\n"; i++; o.indent(i * 2) << stringForModifierType( (ModifierType)InstructionSpecifiers[index].modifierType); - o << "," << "\n"; + o << ",\n"; o.indent(i * 2) << "0x"; o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase); - o << "," << "\n"; - - o.indent(i * 2) << "{" << "\n"; - i++; - - for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) { - o.indent(i * 2) << "{ "; - o <<stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index] - .operands[operandIndex] - .encoding); - o << ", "; - o << stringForOperandType((OperandType)InstructionSpecifiers[index] - .operands[operandIndex] - .type); - o << " }"; - - if (operandIndex < X86_MAX_OPERANDS - 1) - o << ","; - - o << "\n"; + o << ",\n"; + + OperandListTy OperandList; + for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS; + ++OperandIndex) { + const char *Encoding = + stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index] + .operands[OperandIndex].encoding); + const char *Type = + stringForOperandType((OperandType)InstructionSpecifiers[index] + .operands[OperandIndex].type); + OperandList.push_back(std::make_pair(Encoding, Type)); } + o.indent(i * 2) << (OperandSets[OperandList] - 1) << ",\n"; - i--; - o.indent(i * 2) << "}," << "\n"; - o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */"; o << "\n"; i--; o.indent(i * 2) << "}"; - if (index + 1 < numInstructions) + if (index + 1 < NumInstructions) o << ","; o << "\n"; @@ -505,14 +507,12 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) o.indent(i * 2) << "};" << "\n"; } -void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { - uint16_t index; - - o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR +void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { + o.indent(i * 2) << "static const uint8_t " CONTEXTS_STR "[256] = {\n"; i++; - for (index = 0; index < 256; ++index) { + for (unsigned index = 0; index < 256; ++index) { o.indent(i * 2); if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) @@ -545,7 +545,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { o << "IC_64BIT_REXW_XS"; else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) o << "IC_64BIT_REXW_XD"; - else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && + else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) o << "IC_64BIT_REXW_OPSIZE"; else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE)) @@ -593,11 +593,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { o.indent(i * 2) << "};" << "\n"; } -void DisassemblerTables::emitContextDecisions(raw_ostream &o1, - raw_ostream &o2, - uint32_t &i1, - uint32_t &i2) - const { +void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2, + unsigned &i1, unsigned &i2) const { emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR); emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR); emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR); @@ -607,15 +604,15 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1, } void DisassemblerTables::emit(raw_ostream &o) const { - uint32_t i1 = 0; - uint32_t i2 = 0; - + unsigned i1 = 0; + unsigned i2 = 0; + std::string s1; std::string s2; - + raw_string_ostream o1(s1); raw_string_ostream o2(s2); - + emitInstructionInfo(o, i2); o << "\n"; @@ -641,9 +638,7 @@ void DisassemblerTables::setTableFields(ModRMDecision &decision, const ModRMFilter &filter, InstrUID uid, uint8_t opcode) { - unsigned index; - - for (index = 0; index < 256; ++index) { + for (unsigned index = 0; index < 256; ++index) { if (filter.accepts(index)) { if (decision.instructionIDs[index] == uid) continue; @@ -653,10 +648,10 @@ void DisassemblerTables::setTableFields(ModRMDecision &decision, InstructionSpecifiers[uid]; InstructionSpecifier &previousInfo = InstructionSpecifiers[decision.instructionIDs[index]]; - + if(newInfo.filtered) continue; // filtered instructions get lowest priority - + if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" || newInfo.name == "XCHG32ar" || newInfo.name == "XCHG32ar64" || @@ -665,7 +660,7 @@ void DisassemblerTables::setTableFields(ModRMDecision &decision, if (outranks(previousInfo.insnContext, newInfo.insnContext)) continue; - + if (previousInfo.insnContext == newInfo.insnContext && !previousInfo.filtered) { errs() << "Error: Primary decode conflict: "; @@ -690,17 +685,15 @@ void DisassemblerTables::setTableFields(OpcodeType type, InstrUID uid, bool is32bit, bool ignoresVEX_L) { - unsigned index; - ContextDecision &decision = *Tables[type]; - for (index = 0; index < IC_max; ++index) { + for (unsigned index = 0; index < IC_max; ++index) { if (is32bit && inheritsFrom((InstructionContext)index, IC_64BIT)) continue; - if (inheritsFrom((InstructionContext)index, + if (inheritsFrom((InstructionContext)index, InstructionSpecifiers[uid].insnContext, ignoresVEX_L)) - setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], + setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], filter, uid, opcode); diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h index e148cd2..ea006c0 100644 --- a/utils/TableGen/X86DisassemblerTables.h +++ b/utils/TableGen/X86DisassemblerTables.h @@ -42,13 +42,13 @@ private: /// [4] three-byte opcodes of the form 0f a6 __ /// [5] three-byte opcodes of the form 0f a7 __ ContextDecision* Tables[6]; - + /// The instruction information table std::vector<InstructionSpecifier> InstructionSpecifiers; - + /// True if there are primary decode conflicts in the instruction set bool HasConflicts; - + /// emitOneID - Emits a table entry for a single instruction entry, at the /// innermost level of the structure hierarchy. The entry is printed out /// in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal, @@ -64,7 +64,7 @@ private: uint32_t &i, InstrUID id, bool addComma) const; - + /// emitModRMDecision - Emits a table of entries corresponding to a single /// ModR/M decision. Compacts the ModR/M decision if possible. ModR/M /// decisions are printed as: @@ -77,12 +77,12 @@ private: /// where nnnn is a unique ID for the corresponding table of IDs. /// TYPE indicates whether the table has one entry that is the same /// regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one - /// for bytes 0xc0-0xff -, or 256 entries, one for each possible byte. + /// for bytes 0xc0-0xff -, or 256 entries, one for each possible byte. /// nnnn is the number of a table for looking up these values. The tables /// are written separately so that tables consisting entirely of zeros will /// not be duplicated. (These all have the name modRMEmptyTable.) A table /// is printed as: - /// + /// /// InstrUID modRMTablennnn[k] = { /// nnnn, /* MNEMONIC */ /// ... @@ -100,7 +100,7 @@ private: uint32_t &i1, uint32_t &i2, ModRMDecision &decision) const; - + /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M /// decisions. An OpcodeDecision is printed as: /// @@ -129,8 +129,8 @@ private: uint32_t &i1, uint32_t &i2, OpcodeDecision &decision) const; - - /// emitContextDecision - Emits a ContextDecision and all its subsidiary + + /// emitContextDecision - Emits a ContextDecision and all its subsidiary /// Opcode and ModRMDecisions. A ContextDecision is printed as: /// /// struct ContextDecision NAME = { @@ -163,10 +163,10 @@ private: void emitContextDecision(raw_ostream &o1, raw_ostream &o2, uint32_t &i1, - uint32_t &i2, + uint32_t &i2, ContextDecision &decision, const char* name) const; - + /// emitInstructionInfo - Prints the instruction specifier table, which has /// one entry for each instruction, and contains name and operand /// information. This table is printed as: @@ -187,17 +187,17 @@ private: /// }; /// /// k is the total number of instructions. - /// nnnn is the ID of the current instruction (0-based). This table + /// nnnn is the ID of the current instruction (0-based). This table /// includes entries for non-instructions like PHINODE. /// 0xnn is the lowest possible opcode for the current instruction, used for /// AddRegFrm instructions to compute the operand's value. /// ENCODING and TYPE describe the encoding and type for a single operand. /// - /// @param o - The output stream to which the instruction table should be + /// @param o - The output stream to which the instruction table should be /// written. /// @param i - The indent level for use with the stream. void emitInstructionInfo(raw_ostream &o, uint32_t &i) const; - + /// emitContextTable - Prints the table that is used to translate from an /// instruction attribute mask to an instruction context. This table is /// printed as: @@ -213,7 +213,7 @@ private: /// @param o - The output stream to which the context table should be written. /// @param i - The indent level for use with the stream. void emitContextTable(raw_ostream &o, uint32_t &i) const; - + /// emitContextDecisions - Prints all four ContextDecision structures using /// emitContextDecision(). /// @@ -225,7 +225,7 @@ private: void emitContextDecisions(raw_ostream &o1, raw_ostream &o2, uint32_t &i1, - uint32_t &i2) const; + uint32_t &i2) const; /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a /// ModRMDecision to refer to a particular instruction ID. @@ -241,14 +241,14 @@ private: public: /// Constructor - Allocates space for the class decisions and clears them. DisassemblerTables(); - + ~DisassemblerTables(); - + /// emit - Emits the instruction table, context table, and class decisions. /// /// @param o - The output stream to print the tables to. void emit(raw_ostream &o) const; - + /// setTableFields - Uses the opcode type, instruction context, opcode, and a /// ModRMFilter as criteria to set a particular set of entries in the /// decode tables to point to a specific uid. @@ -268,24 +268,24 @@ public: const ModRMFilter &filter, InstrUID uid, bool is32bit, - bool ignoresVEX_L); - + bool ignoresVEX_L); + /// specForUID - Returns the instruction specifier for a given unique /// instruction ID. Used when resolving collisions. /// /// @param uid - The unique ID of the instruction. - /// @return - A reference to the instruction specifier. + /// @return - A reference to the instruction specifier. InstructionSpecifier& specForUID(InstrUID uid) { if (uid >= InstructionSpecifiers.size()) InstructionSpecifiers.resize(uid + 1); - + return InstructionSpecifiers[uid]; } - + // hasConflicts - Reports whether there were primary decode conflicts // from any instructions added to the tables. // @return - true if there were; false otherwise. - + bool hasConflicts() { return HasConflicts; } diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 6a685ff..7ac2336 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -57,7 +57,7 @@ namespace X86Local { MRMDestMem = 4, MRMSrcReg = 5, MRMSrcMem = 6, - MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, + MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, @@ -69,7 +69,7 @@ namespace X86Local { #undef MAP lastMRM }; - + enum { TB = 1, REP = 2, @@ -82,17 +82,17 @@ namespace X86Local { } // If rows are added to the opcode extension tables, then corresponding entries -// must be added here. +// must be added here. // // If the row corresponds to a single byte (i.e., 8f), then add an entry for // that byte to ONE_BYTE_EXTENSION_TABLES. // -// If the row corresponds to two bytes where the first is 0f, add an entry for +// If the row corresponds to two bytes where the first is 0f, add an entry for // the second byte to TWO_BYTE_EXTENSION_TABLES. // // If the row corresponds to some other set of bytes, you will need to modify // the code in RecognizableInstr::emitDecodePath() as well, and add new prefixes -// to the X86 TD files, except in two cases: if the first two bytes of such a +// to the X86 TD files, except in two cases: if the first two bytes of such a // new combination are 0f 38 or 0f 3a, you just have to add maps called // THREE_BYTE_38_EXTENSION_TABLES and THREE_BYTE_3A_EXTENSION_TABLES and add a // switch(Opcode) just below the case X86Local::T8: or case X86Local::TA: line @@ -116,7 +116,7 @@ namespace X86Local { EXTENSION_TABLE(f7) \ EXTENSION_TABLE(fe) \ EXTENSION_TABLE(ff) - + #define TWO_BYTE_EXTENSION_TABLES \ EXTENSION_TABLE(00) \ EXTENSION_TABLE(01) \ @@ -134,7 +134,7 @@ namespace X86Local { using namespace X86Disassembler; /// needsModRMForDecode - Indicates whether a particular instruction requires a -/// ModR/M byte for the instruction to be properly decoded. For example, a +/// ModR/M byte for the instruction to be properly decoded. For example, a /// MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to /// 0b11. /// @@ -213,17 +213,17 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, Rec = insn.TheDef; Name = Rec->getName(); Spec = &tables.specForUID(UID); - + if (!Rec->isSubClassOf("X86Inst")) { ShouldBeEmitted = false; return; } - + Prefix = byteFromRec(Rec, "Prefix"); Opcode = byteFromRec(Rec, "Opcode"); Form = byteFromRec(Rec, "FormBits"); SegOvr = byteFromRec(Rec, "SegOvrBits"); - + HasOpSizePrefix = Rec->getValueAsBit("hasOpSizePrefix"); HasAdSizePrefix = Rec->getValueAsBit("hasAdSizePrefix"); HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix"); @@ -235,12 +235,12 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L"); HasLockPrefix = Rec->getValueAsBit("hasLockPrefix"); IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); - + Name = Rec->getName(); AsmString = Rec->getValueAsString("AsmString"); - + Operands = &insn.Operands.OperandList; - + IsSSE = (HasOpSizePrefix && (Name.find("16") == Name.npos)) || (Name.find("CRC32") != Name.npos); HasFROperands = hasFROperands(); @@ -262,20 +262,20 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, } } // FIXME: These instructions aren't marked as 64-bit in any way - Is64Bit |= Rec->getName() == "JMP64pcrel32" || - Rec->getName() == "MASKMOVDQU64" || - Rec->getName() == "POPFS64" || - Rec->getName() == "POPGS64" || - Rec->getName() == "PUSHFS64" || + Is64Bit |= Rec->getName() == "JMP64pcrel32" || + Rec->getName() == "MASKMOVDQU64" || + Rec->getName() == "POPFS64" || + Rec->getName() == "POPGS64" || + Rec->getName() == "PUSHFS64" || Rec->getName() == "PUSHGS64" || Rec->getName() == "REX64_PREFIX" || - Rec->getName().find("MOV64") != Name.npos || + Rec->getName().find("MOV64") != Name.npos || Rec->getName().find("PUSH64") != Name.npos || Rec->getName().find("POP64") != Name.npos; ShouldBeEmitted = true; } - + void RecognizableInstr::processInstr(DisassemblerTables &tables, const CodeGenInstruction &insn, InstrUID uid) @@ -283,11 +283,11 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables, // Ignore "asm parser only" instructions. if (insn.TheDef->getValueAsBit("isAsmParserOnly")) return; - + RecognizableInstr recogInstr(tables, insn, uid); - + recogInstr.emitInstructionSpecifier(tables); - + if (recogInstr.shouldBeEmitted()) recogInstr.emitDecodePath(tables); } @@ -386,55 +386,40 @@ InstructionContext RecognizableInstr::insnContext() const { return insnContext; } - + RecognizableInstr::filter_ret RecognizableInstr::filter() const { /////////////////// // FILTER_STRONG // - + // Filter out intrinsics - - if (!Rec->isSubClassOf("X86Inst")) - return FILTER_STRONG; - + + assert(Rec->isSubClassOf("X86Inst") && "Can only filter X86 instructions"); + if (Form == X86Local::Pseudo || (IsCodeGenOnly && Name.find("_REV") == Name.npos)) return FILTER_STRONG; - - if (Form == X86Local::MRMInitReg) - return FILTER_STRONG; - - + + // Filter out artificial instructions but leave in the LOCK_PREFIX so it is // printed as a separate "instruction". - + if (Name.find("_Int") != Name.npos || - Name.find("Int_") != Name.npos || - Name.find("_NOREX") != Name.npos || - Name.find("2SDL") != Name.npos) + Name.find("Int_") != Name.npos) return FILTER_STRONG; // Filter out instructions with segment override prefixes. // They're too messy to handle now and we'll special case them if needed. - + if (SegOvr) return FILTER_STRONG; - - // Filter out instructions that can't be printed. - - if (AsmString.size() == 0) - return FILTER_STRONG; - - // Filter out instructions with subreg operands. - - if (AsmString.find("subreg") != AsmString.npos) - return FILTER_STRONG; + ///////////////// // FILTER_WEAK // - + // Filter out instructions with a LOCK prefix; // prefer forms that do not have the prefix if (HasLockPrefix) @@ -474,9 +459,9 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { return FILTER_WEAK; if (HasFROperands && Name.find("MOV") != Name.npos && - ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || + ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || (Name.find("to") != Name.npos))) - return FILTER_WEAK; + return FILTER_STRONG; return FILTER_NORMAL; } @@ -487,7 +472,7 @@ bool RecognizableInstr::hasFROperands() const { for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) { const std::string &recName = OperandList[operandIndex].Rec->getName(); - + if (recName.find("FR") != recName.npos) return true; } @@ -497,17 +482,17 @@ bool RecognizableInstr::hasFROperands() const { bool RecognizableInstr::has256BitOperands() const { const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands; unsigned numOperands = OperandList.size(); - + for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) { const std::string &recName = OperandList[operandIndex].Rec->getName(); - - if (!recName.compare("VR256") || !recName.compare("f256mem")) { + + if (!recName.compare("VR256")) { return true; } } return false; } - + void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex, unsigned &physicalOperandIndex, unsigned &numPhysicalOperands, @@ -521,33 +506,33 @@ void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex, } else { assert(physicalOperandIndex < numPhysicalOperands); } - + while (operandMapping[operandIndex] != operandIndex) { Spec->operands[operandIndex].encoding = ENCODING_DUP; Spec->operands[operandIndex].type = (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]); ++operandIndex; } - + const std::string &typeName = (*Operands)[operandIndex].Rec->getName(); Spec->operands[operandIndex].encoding = encodingFromString(typeName, HasOpSizePrefix); - Spec->operands[operandIndex].type = typeFromString(typeName, + Spec->operands[operandIndex].type = typeFromString(typeName, IsSSE, HasREX_WPrefix, HasOpSizePrefix); - + ++operandIndex; ++physicalOperandIndex; } void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { Spec->name = Name; - - if (!Rec->isSubClassOf("X86Inst")) + + if (!ShouldBeEmitted) return; - + switch (filter()) { case FILTER_WEAK: Spec->filtered = true; @@ -558,22 +543,19 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { case FILTER_NORMAL: break; } - + Spec->insnContext = insnContext(); - + const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands; - + unsigned numOperands = OperandList.size(); unsigned numPhysicalOperands = 0; - + // operandMapping maps from operands in OperandList to their originals. // If operandMapping[i] != i, then the entry is a duplicate. unsigned operandMapping[X86_MAX_OPERANDS]; - - bool hasFROperands = false; - assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough"); - + for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) { if (OperandList[operandIndex].Constraints.size()) { const CGIOperandList::ConstraintInfo &Constraint = @@ -589,20 +571,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { ++numPhysicalOperands; operandMapping[operandIndex] = operandIndex; } - - const std::string &recName = OperandList[operandIndex].Rec->getName(); - - if (recName.find("FR") != recName.npos) - hasFROperands = true; } - - if (hasFROperands && Name.find("MOV") != Name.npos && - ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || - (Name.find("to") != Name.npos))) - ShouldBeEmitted = false; - - if (!ShouldBeEmitted) - return; #define HANDLE_OPERAND(class) \ handleOperand(false, \ @@ -611,7 +580,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { numPhysicalOperands, \ operandMapping, \ class##EncodingFromString); - + #define HANDLE_OPTIONAL(class) \ handleOperand(true, \ operandIndex, \ @@ -619,17 +588,17 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { numPhysicalOperands, \ operandMapping, \ class##EncodingFromString); - + // operandIndex should always be < numOperands unsigned operandIndex = 0; // physicalOperandIndex should always be < numPhysicalOperands unsigned physicalOperandIndex = 0; - + switch (Form) { case X86Local::RawFrm: // Operand 1 (optional) is an address or immediate. // Operand 2 (optional) is an immediate. - assert(numPhysicalOperands <= 2 && + assert(numPhysicalOperands <= 2 && "Unexpected number of operands for RawFrm"); HANDLE_OPTIONAL(relocation) HANDLE_OPTIONAL(immediate) @@ -653,14 +622,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { else assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && "Unexpected number of operands for MRMDestRegFrm"); - + HANDLE_OPERAND(rmRegister) if (HasVEX_4VPrefix) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) - + HANDLE_OPERAND(roRegister) HANDLE_OPTIONAL(immediate) break; @@ -681,7 +650,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) - + HANDLE_OPERAND(roRegister) HANDLE_OPTIONAL(immediate) break; @@ -694,11 +663,11 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix) assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 && - "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); + "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); else assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 4 && "Unexpected number of operands for MRMSrcRegFrm"); - + HANDLE_OPERAND(roRegister) if (HasVEX_4VPrefix) @@ -727,11 +696,11 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix) assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 && - "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); + "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); else assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && "Unexpected number of operands for MRMSrcMemFrm"); - + HANDLE_OPERAND(roRegister) if (HasVEX_4VPrefix) @@ -813,7 +782,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { // Ignored. break; } - + #undef HANDLE_OPERAND #undef HANDLE_OPTIONAL } @@ -827,8 +796,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { break; OpcodeType opcodeType = (OpcodeType)-1; - - ModRMFilter* filter = NULL; + + ModRMFilter* filter = NULL; uint8_t opcodeToSet = 0; switch (Prefix) { @@ -1026,26 +995,26 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { if(Spec->modifierType != MODIFIER_MODRM) { assert(opcodeToSet < 0xf9 && "Not enough room for all ADDREG_FRM operands"); - + uint8_t currentOpcode; for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + 8; ++currentOpcode) - tables.setTableFields(opcodeType, - insnContext(), - currentOpcode, - *filter, + tables.setTableFields(opcodeType, + insnContext(), + currentOpcode, + *filter, UID, Is32Bit, IgnoresVEX_L); - + Spec->modifierType = MODIFIER_OPCODE; Spec->modifierBase = opcodeToSet; } else { // modifierBase was set where MODIFIER_MODRM was set - tables.setTableFields(opcodeType, - insnContext(), - opcodeToSet, - *filter, + tables.setTableFields(opcodeType, + insnContext(), + opcodeToSet, + *filter, UID, Is32Bit, IgnoresVEX_L); } } else { @@ -1054,13 +1023,13 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { opcodeToSet, *filter, UID, Is32Bit, IgnoresVEX_L); - + Spec->modifierType = MODIFIER_NONE; Spec->modifierBase = opcodeToSet; } - + delete filter; - + #undef MAP } @@ -1070,7 +1039,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, bool hasREX_WPrefix, bool hasOpSizePrefix) { if (isSSE) { - // For SSE instructions, we ignore the OpSize prefix and force operand + // For SSE instructions, we ignore the OpSize prefix and force operand // sizes. TYPE("GR16", TYPE_R16) TYPE("GR32", TYPE_R32) diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl index 8f53514..d92a767 100755 --- a/utils/UpdateCMakeLists.pl +++ b/utils/UpdateCMakeLists.pl @@ -68,8 +68,7 @@ sub UpdateCMake { while(<IN>) { if (!$foundLibrary) { print OUT $_; - if (/^add_clang_library\(/ || /^add_llvm_library\(/ || /^add_llvm_target\(/ - || /^add_executable\(/) { + if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_executable\(/) { $foundLibrary = 1; EmitCMakeList($dir); } diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm index 994fb06..6aee831 100755 --- a/utils/buildit/build_llvm +++ b/utils/buildit/build_llvm @@ -133,7 +133,7 @@ if [ \! -f Makefile.config ]; then || exit 1 fi -SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/[^.]*\.\([0-9]*\).*/\1/'` +SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/.*\.\([0-9]*\).*/\1/'` if [ "x$SUBVERSION" != "x$RC_ProjectSourceVersion" ]; then LLVM_SUBMIT_SUBVERSION=`printf "%02d" $SUBVERSION` diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py index 039868d..25bbcbd 100755 --- a/utils/lit/lit/main.py +++ b/utils/lit/lit/main.py @@ -566,6 +566,9 @@ def main(builtinParameters = {}): # Bump the GIL check interval, its more imp if opts.maxTests is not None: tests = tests[:opts.maxTests] + # Don't create more threads than tests. + opts.numThreads = min(len(tests), opts.numThreads) + extra = '' if len(tests) != numTotalTests: extra = ' of %d' % numTotalTests @@ -589,9 +592,6 @@ def main(builtinParameters = {}): # Bump the GIL check interval, its more imp else: print header - # Don't create more threads than tests. - opts.numThreads = min(len(tests), opts.numThreads) - startTime = time.time() display = TestingProgressDisplay(opts, len(tests), progressBar) provider = TestProvider(tests, opts.maxTime) diff --git a/utils/llvm.grm b/utils/llvm.grm index 322036b..ad2799f 100644 --- a/utils/llvm.grm +++ b/utils/llvm.grm @@ -175,6 +175,7 @@ FuncAttr ::= noreturn | returns_twice | nonlazybind | address_safety + | ia_nsdialect ; OptFuncAttrs ::= + _ | OptFuncAttrs FuncAttr ; diff --git a/utils/obj2yaml/coff2yaml.cpp b/utils/obj2yaml/coff2yaml.cpp index 2dbd531..c9a7159 100644 --- a/utils/obj2yaml/coff2yaml.cpp +++ b/utils/obj2yaml/coff2yaml.cpp @@ -276,7 +276,8 @@ static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj, Obj.getSectionContents(sect, sectionData); Out << " SectionData: "; yaml::writeHexStream(Out, sectionData) << endl; - + if (iter->begin_relocations() != iter->end_relocations()) + Out << " Relocations:\n"; for (llvm::object::relocation_iterator rIter = iter->begin_relocations(); rIter != iter->end_relocations(); rIter.increment(ec)) { const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter); diff --git a/utils/yaml2obj/CMakeLists.txt b/utils/yaml2obj/CMakeLists.txt new file mode 100644 index 0000000..f8b1197 --- /dev/null +++ b/utils/yaml2obj/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_utility(yaml2obj + yaml2obj.cpp + ) + +target_link_libraries(yaml2obj LLVMSupport) diff --git a/utils/yaml2obj/Makefile b/utils/yaml2obj/Makefile new file mode 100644 index 0000000..e746d85 --- /dev/null +++ b/utils/yaml2obj/Makefile @@ -0,0 +1,20 @@ +##===- utils/yaml2obj/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = yaml2obj +USEDLIBS = LLVMSupport.a + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +# Don't install this utility +NO_INSTALL = 1 + +include $(LEVEL)/Makefile.common diff --git a/utils/yaml2obj/yaml2obj.cpp b/utils/yaml2obj/yaml2obj.cpp new file mode 100644 index 0000000..c3b3e54 --- /dev/null +++ b/utils/yaml2obj/yaml2obj.cpp @@ -0,0 +1,879 @@ +//===- yaml2obj - Convert YAML to a binary object file --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program takes a YAML description of an object file and outputs the +// binary equivalent. +// +// This is used for writing tests that require binary files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/system_error.h" +#include "llvm/Support/YAMLParser.h" + +#include <vector> + +using namespace llvm; + +static cl::opt<std::string> + Input(cl::Positional, cl::desc("<input>"), cl::init("-")); + +template<class T> +typename llvm::enable_if_c<std::numeric_limits<T>::is_integer, bool>::type +getAs(const llvm::yaml::ScalarNode *SN, T &Result) { + SmallString<4> Storage; + StringRef Value = SN->getValue(Storage); + if (Value.getAsInteger(0, Result)) + return false; + return true; +} + +// Given a container with begin and end with ::value_type of a character type. +// Iterate through pairs of characters in the the set of [a-fA-F0-9] ignoring +// all other characters. +struct hex_pair_iterator { + StringRef::const_iterator Current, End; + typedef SmallVector<char, 2> value_type; + value_type Pair; + bool IsDone; + + hex_pair_iterator(StringRef C) + : Current(C.begin()), End(C.end()), IsDone(false) { + // Initalize Pair. + ++*this; + } + + // End iterator. + hex_pair_iterator() : Current(), End(), IsDone(true) {} + + value_type operator *() const { + return Pair; + } + + hex_pair_iterator operator ++() { + // We're at the end of the input. + if (Current == End) { + IsDone = true; + return *this; + } + Pair = value_type(); + for (; Current != End && Pair.size() != 2; ++Current) { + // Is a valid hex digit. + if ((*Current >= '0' && *Current <= '9') || + (*Current >= 'a' && *Current <= 'f') || + (*Current >= 'A' && *Current <= 'F')) + Pair.push_back(*Current); + } + // Hit the end without getting 2 hex digits. Pair is invalid. + if (Pair.size() != 2) + IsDone = true; + return *this; + } + + bool operator ==(const hex_pair_iterator Other) { + return (IsDone == Other.IsDone) || + (Current == Other.Current && End == Other.End); + } + + bool operator !=(const hex_pair_iterator Other) { + return !(*this == Other); + } +}; + +template <class ContainerOut> +static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) { + for (hex_pair_iterator I(Str), E; I != E; ++I) { + typename hex_pair_iterator::value_type Pair = *I; + typename ContainerOut::value_type Byte; + if (StringRef(Pair.data(), 2).getAsInteger(16, Byte)) + return false; + Out.push_back(Byte); + } + return true; +} + +/// This parses a yaml stream that represents a COFF object file. +/// See docs/yaml2obj for the yaml scheema. +struct COFFParser { + COFFParser(yaml::Stream &Input) : YS(Input) { + std::memset(&Header, 0, sizeof(Header)); + // A COFF string table always starts with a 4 byte size field. Offsets into + // it include this size, so allocate it now. + StringTable.append(4, 0); + } + + bool parseHeader(yaml::Node *HeaderN) { + yaml::MappingNode *MN = dyn_cast<yaml::MappingNode>(HeaderN); + if (!MN) { + YS.printError(HeaderN, "header's value must be a mapping node"); + return false; + } + for (yaml::MappingNode::iterator i = MN->begin(), e = MN->end(); + i != e; ++i) { + yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey()); + if (!Key) { + YS.printError(i->getKey(), "Keys must be scalar values"); + return false; + } + SmallString<32> Storage; + StringRef KeyValue = Key->getValue(Storage); + if (KeyValue == "Characteristics") { + if (!parseHeaderCharacteristics(i->getValue())) + return false; + } else { + yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(i->getValue()); + if (!Value) { + YS.printError(Value, + Twine(KeyValue) + " must be a scalar value"); + return false; + } + if (KeyValue == "Machine") { + uint16_t Machine; + if (!getAs(Value, Machine)) { + // It's not a raw number, try matching the string. + StringRef ValueValue = Value->getValue(Storage); + Machine = StringSwitch<COFF::MachineTypes>(ValueValue) + .Case( "IMAGE_FILE_MACHINE_UNKNOWN" + , COFF::IMAGE_FILE_MACHINE_UNKNOWN) + .Case( "IMAGE_FILE_MACHINE_AM33" + , COFF::IMAGE_FILE_MACHINE_AM33) + .Case( "IMAGE_FILE_MACHINE_AMD64" + , COFF::IMAGE_FILE_MACHINE_AMD64) + .Case( "IMAGE_FILE_MACHINE_ARM" + , COFF::IMAGE_FILE_MACHINE_ARM) + .Case( "IMAGE_FILE_MACHINE_ARMV7" + , COFF::IMAGE_FILE_MACHINE_ARMV7) + .Case( "IMAGE_FILE_MACHINE_EBC" + , COFF::IMAGE_FILE_MACHINE_EBC) + .Case( "IMAGE_FILE_MACHINE_I386" + , COFF::IMAGE_FILE_MACHINE_I386) + .Case( "IMAGE_FILE_MACHINE_IA64" + , COFF::IMAGE_FILE_MACHINE_IA64) + .Case( "IMAGE_FILE_MACHINE_M32R" + , COFF::IMAGE_FILE_MACHINE_M32R) + .Case( "IMAGE_FILE_MACHINE_MIPS16" + , COFF::IMAGE_FILE_MACHINE_MIPS16) + .Case( "IMAGE_FILE_MACHINE_MIPSFPU" + , COFF::IMAGE_FILE_MACHINE_MIPSFPU) + .Case( "IMAGE_FILE_MACHINE_MIPSFPU16" + , COFF::IMAGE_FILE_MACHINE_MIPSFPU16) + .Case( "IMAGE_FILE_MACHINE_POWERPC" + , COFF::IMAGE_FILE_MACHINE_POWERPC) + .Case( "IMAGE_FILE_MACHINE_POWERPCFP" + , COFF::IMAGE_FILE_MACHINE_POWERPCFP) + .Case( "IMAGE_FILE_MACHINE_R4000" + , COFF::IMAGE_FILE_MACHINE_R4000) + .Case( "IMAGE_FILE_MACHINE_SH3" + , COFF::IMAGE_FILE_MACHINE_SH3) + .Case( "IMAGE_FILE_MACHINE_SH3DSP" + , COFF::IMAGE_FILE_MACHINE_SH3DSP) + .Case( "IMAGE_FILE_MACHINE_SH4" + , COFF::IMAGE_FILE_MACHINE_SH4) + .Case( "IMAGE_FILE_MACHINE_SH5" + , COFF::IMAGE_FILE_MACHINE_SH5) + .Case( "IMAGE_FILE_MACHINE_THUMB" + , COFF::IMAGE_FILE_MACHINE_THUMB) + .Case( "IMAGE_FILE_MACHINE_WCEMIPSV2" + , COFF::IMAGE_FILE_MACHINE_WCEMIPSV2) + .Default(COFF::MT_Invalid); + if (Machine == COFF::MT_Invalid) { + YS.printError(Value, "Invalid value for Machine"); + return false; + } + } + Header.Machine = Machine; + } else if (KeyValue == "NumberOfSections") { + if (!getAs(Value, Header.NumberOfSections)) { + YS.printError(Value, "Invalid value for NumberOfSections"); + return false; + } + } else if (KeyValue == "TimeDateStamp") { + if (!getAs(Value, Header.TimeDateStamp)) { + YS.printError(Value, "Invalid value for TimeDateStamp"); + return false; + } + } else if (KeyValue == "PointerToSymbolTable") { + if (!getAs(Value, Header.PointerToSymbolTable)) { + YS.printError(Value, "Invalid value for PointerToSymbolTable"); + return false; + } + } else if (KeyValue == "NumberOfSymbols") { + if (!getAs(Value, Header.NumberOfSymbols)) { + YS.printError(Value, "Invalid value for NumberOfSymbols"); + return false; + } + } else if (KeyValue == "SizeOfOptionalHeader") { + if (!getAs(Value, Header.SizeOfOptionalHeader)) { + YS.printError(Value, "Invalid value for SizeOfOptionalHeader"); + return false; + } + } else { + YS.printError(Key, "Unrecognized key in header"); + return false; + } + } + } + return true; + } + + bool parseHeaderCharacteristics(yaml::Node *Characteristics) { + yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(Characteristics); + yaml::SequenceNode *SeqValue + = dyn_cast<yaml::SequenceNode>(Characteristics); + if (!Value && !SeqValue) { + YS.printError(Characteristics, + "Characteristics must either be a number or sequence"); + return false; + } + if (Value) { + if (!getAs(Value, Header.Characteristics)) { + YS.printError(Value, "Invalid value for Characteristics"); + return false; + } + } else { + for (yaml::SequenceNode::iterator ci = SeqValue->begin(), + ce = SeqValue->end(); + ci != ce; ++ci) { + yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci); + if (!CharValue) { + YS.printError(CharValue, + "Characteristics must be scalar values"); + return false; + } + SmallString<32> Storage; + StringRef Char = CharValue->getValue(Storage); + uint16_t Characteristic = StringSwitch<COFF::Characteristics>(Char) + .Case( "IMAGE_FILE_RELOCS_STRIPPED" + , COFF::IMAGE_FILE_RELOCS_STRIPPED) + .Case( "IMAGE_FILE_EXECUTABLE_IMAGE" + , COFF::IMAGE_FILE_EXECUTABLE_IMAGE) + .Case( "IMAGE_FILE_LINE_NUMS_STRIPPED" + , COFF::IMAGE_FILE_LINE_NUMS_STRIPPED) + .Case( "IMAGE_FILE_LOCAL_SYMS_STRIPPED" + , COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED) + .Case( "IMAGE_FILE_AGGRESSIVE_WS_TRIM" + , COFF::IMAGE_FILE_AGGRESSIVE_WS_TRIM) + .Case( "IMAGE_FILE_LARGE_ADDRESS_AWARE" + , COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE) + .Case( "IMAGE_FILE_BYTES_REVERSED_LO" + , COFF::IMAGE_FILE_BYTES_REVERSED_LO) + .Case( "IMAGE_FILE_32BIT_MACHINE" + , COFF::IMAGE_FILE_32BIT_MACHINE) + .Case( "IMAGE_FILE_DEBUG_STRIPPED" + , COFF::IMAGE_FILE_DEBUG_STRIPPED) + .Case( "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP" + , COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP) + .Case( "IMAGE_FILE_SYSTEM" + , COFF::IMAGE_FILE_SYSTEM) + .Case( "IMAGE_FILE_DLL" + , COFF::IMAGE_FILE_DLL) + .Case( "IMAGE_FILE_UP_SYSTEM_ONLY" + , COFF::IMAGE_FILE_UP_SYSTEM_ONLY) + .Default(COFF::C_Invalid); + if (Characteristic == COFF::C_Invalid) { + // TODO: Typo-correct. + YS.printError(CharValue, + "Invalid value for Characteristic"); + return false; + } + Header.Characteristics |= Characteristic; + } + } + return true; + } + + bool parseSections(yaml::Node *SectionsN) { + yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SectionsN); + if (!SN) { + YS.printError(SectionsN, "Sections must be a sequence"); + return false; + } + for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end(); + i != e; ++i) { + Section Sec; + std::memset(&Sec.Header, 0, sizeof(Sec.Header)); + yaml::MappingNode *SecMap = dyn_cast<yaml::MappingNode>(&*i); + if (!SecMap) { + YS.printError(&*i, "Section entry must be a map"); + return false; + } + for (yaml::MappingNode::iterator si = SecMap->begin(), se = SecMap->end(); + si != se; ++si) { + yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey()); + if (!Key) { + YS.printError(si->getKey(), "Keys must be scalar values"); + return false; + } + SmallString<32> Storage; + StringRef KeyValue = Key->getValue(Storage); + + yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue()); + if (KeyValue == "Name") { + // If the name is less than 8 bytes, store it in place, otherwise + // store it in the string table. + StringRef Name = Value->getValue(Storage); + std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0); + if (Name.size() <= COFF::NameSize) { + std::copy(Name.begin(), Name.end(), Sec.Header.Name); + } else { + // Add string to the string table and format the index for output. + unsigned Index = getStringIndex(Name); + std::string str = utostr(Index); + if (str.size() > 7) { + YS.printError(Value, "String table got too large"); + return false; + } + Sec.Header.Name[0] = '/'; + std::copy(str.begin(), str.end(), Sec.Header.Name + 1); + } + } else if (KeyValue == "VirtualSize") { + if (!getAs(Value, Sec.Header.VirtualSize)) { + YS.printError(Value, "Invalid value for VirtualSize"); + return false; + } + } else if (KeyValue == "VirtualAddress") { + if (!getAs(Value, Sec.Header.VirtualAddress)) { + YS.printError(Value, "Invalid value for VirtualAddress"); + return false; + } + } else if (KeyValue == "SizeOfRawData") { + if (!getAs(Value, Sec.Header.SizeOfRawData)) { + YS.printError(Value, "Invalid value for SizeOfRawData"); + return false; + } + } else if (KeyValue == "PointerToRawData") { + if (!getAs(Value, Sec.Header.PointerToRawData)) { + YS.printError(Value, "Invalid value for PointerToRawData"); + return false; + } + } else if (KeyValue == "PointerToRelocations") { + if (!getAs(Value, Sec.Header.PointerToRelocations)) { + YS.printError(Value, "Invalid value for PointerToRelocations"); + return false; + } + } else if (KeyValue == "PointerToLineNumbers") { + if (!getAs(Value, Sec.Header.PointerToLineNumbers)) { + YS.printError(Value, "Invalid value for PointerToLineNumbers"); + return false; + } + } else if (KeyValue == "NumberOfRelocations") { + if (!getAs(Value, Sec.Header.NumberOfRelocations)) { + YS.printError(Value, "Invalid value for NumberOfRelocations"); + return false; + } + } else if (KeyValue == "NumberOfLineNumbers") { + if (!getAs(Value, Sec.Header.NumberOfLineNumbers)) { + YS.printError(Value, "Invalid value for NumberOfLineNumbers"); + return false; + } + } else if (KeyValue == "Characteristics") { + yaml::SequenceNode *SeqValue + = dyn_cast<yaml::SequenceNode>(si->getValue()); + if (!Value && !SeqValue) { + YS.printError(si->getValue(), + "Characteristics must either be a number or sequence"); + return false; + } + if (Value) { + if (!getAs(Value, Sec.Header.Characteristics)) { + YS.printError(Value, "Invalid value for Characteristics"); + return false; + } + } else { + for (yaml::SequenceNode::iterator ci = SeqValue->begin(), + ce = SeqValue->end(); + ci != ce; ++ci) { + yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci); + if (!CharValue) { + YS.printError(CharValue, "Invalid value for Characteristics"); + return false; + } + StringRef Char = CharValue->getValue(Storage); + uint32_t Characteristic = + StringSwitch<COFF::SectionCharacteristics>(Char) + .Case( "IMAGE_SCN_TYPE_NO_PAD" + , COFF::IMAGE_SCN_TYPE_NO_PAD) + .Case( "IMAGE_SCN_CNT_CODE" + , COFF::IMAGE_SCN_CNT_CODE) + .Case( "IMAGE_SCN_CNT_INITIALIZED_DATA" + , COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + .Case( "IMAGE_SCN_CNT_UNINITIALIZED_DATA" + , COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + .Case( "IMAGE_SCN_LNK_OTHER" + , COFF::IMAGE_SCN_LNK_OTHER) + .Case( "IMAGE_SCN_LNK_INFO" + , COFF::IMAGE_SCN_LNK_INFO) + .Case( "IMAGE_SCN_LNK_REMOVE" + , COFF::IMAGE_SCN_LNK_REMOVE) + .Case( "IMAGE_SCN_LNK_COMDAT" + , COFF::IMAGE_SCN_LNK_COMDAT) + .Case( "IMAGE_SCN_GPREL" + , COFF::IMAGE_SCN_GPREL) + .Case( "IMAGE_SCN_MEM_PURGEABLE" + , COFF::IMAGE_SCN_MEM_PURGEABLE) + .Case( "IMAGE_SCN_MEM_16BIT" + , COFF::IMAGE_SCN_MEM_16BIT) + .Case( "IMAGE_SCN_MEM_LOCKED" + , COFF::IMAGE_SCN_MEM_LOCKED) + .Case( "IMAGE_SCN_MEM_PRELOAD" + , COFF::IMAGE_SCN_MEM_PRELOAD) + .Case( "IMAGE_SCN_ALIGN_1BYTES" + , COFF::IMAGE_SCN_ALIGN_1BYTES) + .Case( "IMAGE_SCN_ALIGN_2BYTES" + , COFF::IMAGE_SCN_ALIGN_2BYTES) + .Case( "IMAGE_SCN_ALIGN_4BYTES" + , COFF::IMAGE_SCN_ALIGN_4BYTES) + .Case( "IMAGE_SCN_ALIGN_8BYTES" + , COFF::IMAGE_SCN_ALIGN_8BYTES) + .Case( "IMAGE_SCN_ALIGN_16BYTES" + , COFF::IMAGE_SCN_ALIGN_16BYTES) + .Case( "IMAGE_SCN_ALIGN_32BYTES" + , COFF::IMAGE_SCN_ALIGN_32BYTES) + .Case( "IMAGE_SCN_ALIGN_64BYTES" + , COFF::IMAGE_SCN_ALIGN_64BYTES) + .Case( "IMAGE_SCN_ALIGN_128BYTES" + , COFF::IMAGE_SCN_ALIGN_128BYTES) + .Case( "IMAGE_SCN_ALIGN_256BYTES" + , COFF::IMAGE_SCN_ALIGN_256BYTES) + .Case( "IMAGE_SCN_ALIGN_512BYTES" + , COFF::IMAGE_SCN_ALIGN_512BYTES) + .Case( "IMAGE_SCN_ALIGN_1024BYTES" + , COFF::IMAGE_SCN_ALIGN_1024BYTES) + .Case( "IMAGE_SCN_ALIGN_2048BYTES" + , COFF::IMAGE_SCN_ALIGN_2048BYTES) + .Case( "IMAGE_SCN_ALIGN_4096BYTES" + , COFF::IMAGE_SCN_ALIGN_4096BYTES) + .Case( "IMAGE_SCN_ALIGN_8192BYTES" + , COFF::IMAGE_SCN_ALIGN_8192BYTES) + .Case( "IMAGE_SCN_LNK_NRELOC_OVFL" + , COFF::IMAGE_SCN_LNK_NRELOC_OVFL) + .Case( "IMAGE_SCN_MEM_DISCARDABLE" + , COFF::IMAGE_SCN_MEM_DISCARDABLE) + .Case( "IMAGE_SCN_MEM_NOT_CACHED" + , COFF::IMAGE_SCN_MEM_NOT_CACHED) + .Case( "IMAGE_SCN_MEM_NOT_PAGED" + , COFF::IMAGE_SCN_MEM_NOT_PAGED) + .Case( "IMAGE_SCN_MEM_SHARED" + , COFF::IMAGE_SCN_MEM_SHARED) + .Case( "IMAGE_SCN_MEM_EXECUTE" + , COFF::IMAGE_SCN_MEM_EXECUTE) + .Case( "IMAGE_SCN_MEM_READ" + , COFF::IMAGE_SCN_MEM_READ) + .Case( "IMAGE_SCN_MEM_WRITE" + , COFF::IMAGE_SCN_MEM_WRITE) + .Default(COFF::SC_Invalid); + if (Characteristic == COFF::SC_Invalid) { + YS.printError(CharValue, "Invalid value for Characteristic"); + return false; + } + Sec.Header.Characteristics |= Characteristic; + } + } + } else if (KeyValue == "SectionData") { + yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue()); + SmallString<32> Storage; + StringRef Data = Value->getValue(Storage); + if (!hexStringToByteArray(Data, Sec.Data)) { + YS.printError(Value, "SectionData must be a collection of pairs of" + "hex bytes"); + return false; + } + } else + si->skip(); + } + Sections.push_back(Sec); + } + return true; + } + + bool parseSymbols(yaml::Node *SymbolsN) { + yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SymbolsN); + if (!SN) { + YS.printError(SymbolsN, "Symbols must be a sequence"); + return false; + } + for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end(); + i != e; ++i) { + Symbol Sym; + std::memset(&Sym.Header, 0, sizeof(Sym.Header)); + yaml::MappingNode *SymMap = dyn_cast<yaml::MappingNode>(&*i); + if (!SymMap) { + YS.printError(&*i, "Symbol must be a map"); + return false; + } + for (yaml::MappingNode::iterator si = SymMap->begin(), se = SymMap->end(); + si != se; ++si) { + yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey()); + if (!Key) { + YS.printError(si->getKey(), "Keys must be scalar values"); + return false; + } + SmallString<32> Storage; + StringRef KeyValue = Key->getValue(Storage); + + yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue()); + if (!Value) { + YS.printError(si->getValue(), "Must be a scalar value"); + return false; + } + if (KeyValue == "Name") { + // If the name is less than 8 bytes, store it in place, otherwise + // store it in the string table. + StringRef Name = Value->getValue(Storage); + std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0); + if (Name.size() <= COFF::NameSize) { + std::copy(Name.begin(), Name.end(), Sym.Header.Name); + } else { + // Add string to the string table and format the index for output. + unsigned Index = getStringIndex(Name); + *reinterpret_cast<support::aligned_ulittle32_t*>( + Sym.Header.Name + 4) = Index; + } + } else if (KeyValue == "Value") { + if (!getAs(Value, Sym.Header.Value)) { + YS.printError(Value, "Invalid value for Value"); + return false; + } + } else if (KeyValue == "SimpleType") { + Sym.Header.Type |= StringSwitch<COFF::SymbolBaseType>( + Value->getValue(Storage)) + .Case("IMAGE_SYM_TYPE_NULL", COFF::IMAGE_SYM_TYPE_NULL) + .Case("IMAGE_SYM_TYPE_VOID", COFF::IMAGE_SYM_TYPE_VOID) + .Case("IMAGE_SYM_TYPE_CHAR", COFF::IMAGE_SYM_TYPE_CHAR) + .Case("IMAGE_SYM_TYPE_SHORT", COFF::IMAGE_SYM_TYPE_SHORT) + .Case("IMAGE_SYM_TYPE_INT", COFF::IMAGE_SYM_TYPE_INT) + .Case("IMAGE_SYM_TYPE_LONG", COFF::IMAGE_SYM_TYPE_LONG) + .Case("IMAGE_SYM_TYPE_FLOAT", COFF::IMAGE_SYM_TYPE_FLOAT) + .Case("IMAGE_SYM_TYPE_DOUBLE", COFF::IMAGE_SYM_TYPE_DOUBLE) + .Case("IMAGE_SYM_TYPE_STRUCT", COFF::IMAGE_SYM_TYPE_STRUCT) + .Case("IMAGE_SYM_TYPE_UNION", COFF::IMAGE_SYM_TYPE_UNION) + .Case("IMAGE_SYM_TYPE_ENUM", COFF::IMAGE_SYM_TYPE_ENUM) + .Case("IMAGE_SYM_TYPE_MOE", COFF::IMAGE_SYM_TYPE_MOE) + .Case("IMAGE_SYM_TYPE_BYTE", COFF::IMAGE_SYM_TYPE_BYTE) + .Case("IMAGE_SYM_TYPE_WORD", COFF::IMAGE_SYM_TYPE_WORD) + .Case("IMAGE_SYM_TYPE_UINT", COFF::IMAGE_SYM_TYPE_UINT) + .Case("IMAGE_SYM_TYPE_DWORD", COFF::IMAGE_SYM_TYPE_DWORD) + .Default(COFF::IMAGE_SYM_TYPE_NULL); + } else if (KeyValue == "ComplexType") { + Sym.Header.Type |= StringSwitch<COFF::SymbolComplexType>( + Value->getValue(Storage)) + .Case("IMAGE_SYM_DTYPE_NULL", COFF::IMAGE_SYM_DTYPE_NULL) + .Case("IMAGE_SYM_DTYPE_POINTER", COFF::IMAGE_SYM_DTYPE_POINTER) + .Case("IMAGE_SYM_DTYPE_FUNCTION", COFF::IMAGE_SYM_DTYPE_FUNCTION) + .Case("IMAGE_SYM_DTYPE_ARRAY", COFF::IMAGE_SYM_DTYPE_ARRAY) + .Default(COFF::IMAGE_SYM_DTYPE_NULL) + << COFF::SCT_COMPLEX_TYPE_SHIFT; + } else if (KeyValue == "StorageClass") { + Sym.Header.StorageClass = StringSwitch<COFF::SymbolStorageClass>( + Value->getValue(Storage)) + .Case( "IMAGE_SYM_CLASS_END_OF_FUNCTION" + , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION) + .Case( "IMAGE_SYM_CLASS_NULL" + , COFF::IMAGE_SYM_CLASS_NULL) + .Case( "IMAGE_SYM_CLASS_AUTOMATIC" + , COFF::IMAGE_SYM_CLASS_AUTOMATIC) + .Case( "IMAGE_SYM_CLASS_EXTERNAL" + , COFF::IMAGE_SYM_CLASS_EXTERNAL) + .Case( "IMAGE_SYM_CLASS_STATIC" + , COFF::IMAGE_SYM_CLASS_STATIC) + .Case( "IMAGE_SYM_CLASS_REGISTER" + , COFF::IMAGE_SYM_CLASS_REGISTER) + .Case( "IMAGE_SYM_CLASS_EXTERNAL_DEF" + , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF) + .Case( "IMAGE_SYM_CLASS_LABEL" + , COFF::IMAGE_SYM_CLASS_LABEL) + .Case( "IMAGE_SYM_CLASS_UNDEFINED_LABEL" + , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL) + .Case( "IMAGE_SYM_CLASS_MEMBER_OF_STRUCT" + , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT) + .Case( "IMAGE_SYM_CLASS_ARGUMENT" + , COFF::IMAGE_SYM_CLASS_ARGUMENT) + .Case( "IMAGE_SYM_CLASS_STRUCT_TAG" + , COFF::IMAGE_SYM_CLASS_STRUCT_TAG) + .Case( "IMAGE_SYM_CLASS_MEMBER_OF_UNION" + , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION) + .Case( "IMAGE_SYM_CLASS_UNION_TAG" + , COFF::IMAGE_SYM_CLASS_UNION_TAG) + .Case( "IMAGE_SYM_CLASS_TYPE_DEFINITION" + , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION) + .Case( "IMAGE_SYM_CLASS_UNDEFINED_STATIC" + , COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC) + .Case( "IMAGE_SYM_CLASS_ENUM_TAG" + , COFF::IMAGE_SYM_CLASS_ENUM_TAG) + .Case( "IMAGE_SYM_CLASS_MEMBER_OF_ENUM" + , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM) + .Case( "IMAGE_SYM_CLASS_REGISTER_PARAM" + , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM) + .Case( "IMAGE_SYM_CLASS_BIT_FIELD" + , COFF::IMAGE_SYM_CLASS_BIT_FIELD) + .Case( "IMAGE_SYM_CLASS_BLOCK" + , COFF::IMAGE_SYM_CLASS_BLOCK) + .Case( "IMAGE_SYM_CLASS_FUNCTION" + , COFF::IMAGE_SYM_CLASS_FUNCTION) + .Case( "IMAGE_SYM_CLASS_END_OF_STRUCT" + , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT) + .Case( "IMAGE_SYM_CLASS_FILE" + , COFF::IMAGE_SYM_CLASS_FILE) + .Case( "IMAGE_SYM_CLASS_SECTION" + , COFF::IMAGE_SYM_CLASS_SECTION) + .Case( "IMAGE_SYM_CLASS_WEAK_EXTERNAL" + , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) + .Case( "IMAGE_SYM_CLASS_CLR_TOKEN" + , COFF::IMAGE_SYM_CLASS_CLR_TOKEN) + .Default(COFF::SSC_Invalid); + if (Sym.Header.StorageClass == COFF::SSC_Invalid) { + YS.printError(Value, "Invalid value for StorageClass"); + return false; + } + } else if (KeyValue == "SectionNumber") { + if (!getAs(Value, Sym.Header.SectionNumber)) { + YS.printError(Value, "Invalid value for SectionNumber"); + return false; + } + } else if (KeyValue == "AuxillaryData") { + StringRef Data = Value->getValue(Storage); + if (!hexStringToByteArray(Data, Sym.AuxSymbols)) { + YS.printError(Value, "AuxillaryData must be a collection of pairs" + "of hex bytes"); + return false; + } + } else + si->skip(); + } + Symbols.push_back(Sym); + } + return true; + } + + bool parse() { + yaml::Document &D = *YS.begin(); + yaml::MappingNode *Root = dyn_cast<yaml::MappingNode>(D.getRoot()); + if (!Root) { + YS.printError(D.getRoot(), "Root node must be a map"); + return false; + } + for (yaml::MappingNode::iterator i = Root->begin(), e = Root->end(); + i != e; ++i) { + yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey()); + if (!Key) { + YS.printError(i->getKey(), "Keys must be scalar values"); + return false; + } + SmallString<32> Storage; + StringRef KeyValue = Key->getValue(Storage); + if (KeyValue == "header") { + if (!parseHeader(i->getValue())) + return false; + } else if (KeyValue == "sections") { + if (!parseSections(i->getValue())) + return false; + } else if (KeyValue == "symbols") { + if (!parseSymbols(i->getValue())) + return false; + } + } + return !YS.failed(); + } + + unsigned getStringIndex(StringRef Str) { + StringMap<unsigned>::iterator i = StringTableMap.find(Str); + if (i == StringTableMap.end()) { + unsigned Index = StringTable.size(); + StringTable.append(Str.begin(), Str.end()); + StringTable.push_back(0); + StringTableMap[Str] = Index; + return Index; + } + return i->second; + } + + yaml::Stream &YS; + COFF::header Header; + + struct Section { + COFF::section Header; + std::vector<uint8_t> Data; + std::vector<COFF::relocation> Relocations; + }; + + struct Symbol { + COFF::symbol Header; + std::vector<uint8_t> AuxSymbols; + }; + + std::vector<Section> Sections; + std::vector<Symbol> Symbols; + StringMap<unsigned> StringTableMap; + std::string StringTable; +}; + +// Take a CP and assign addresses and sizes to everything. Returns false if the +// layout is not valid to do. +static bool layoutCOFF(COFFParser &CP) { + uint32_t SectionTableStart = 0; + uint32_t SectionTableSize = 0; + + // The section table starts immediately after the header, including the + // optional header. + SectionTableStart = sizeof(COFF::header) + CP.Header.SizeOfOptionalHeader; + SectionTableSize = sizeof(COFF::section) * CP.Sections.size(); + + uint32_t CurrentSectionDataOffset = SectionTableStart + SectionTableSize; + + // Assign each section data address consecutively. + for (std::vector<COFFParser::Section>::iterator i = CP.Sections.begin(), + e = CP.Sections.end(); + i != e; ++i) { + if (!i->Data.empty()) { + i->Header.SizeOfRawData = i->Data.size(); + i->Header.PointerToRawData = CurrentSectionDataOffset; + CurrentSectionDataOffset += i->Header.SizeOfRawData; + // TODO: Handle alignment. + } else { + i->Header.SizeOfRawData = 0; + i->Header.PointerToRawData = 0; + } + } + + uint32_t SymbolTableStart = CurrentSectionDataOffset; + + // Calculate number of symbols. + uint32_t NumberOfSymbols = 0; + for (std::vector<COFFParser::Symbol>::iterator i = CP.Symbols.begin(), + e = CP.Symbols.end(); + i != e; ++i) { + if (i->AuxSymbols.size() % COFF::SymbolSize != 0) { + errs() << "AuxillaryData size not a multiple of symbol size!\n"; + return false; + } + i->Header.NumberOfAuxSymbols = i->AuxSymbols.size() / COFF::SymbolSize; + NumberOfSymbols += 1 + i->Header.NumberOfAuxSymbols; + } + + // Store all the allocated start addresses in the header. + CP.Header.NumberOfSections = CP.Sections.size(); + CP.Header.NumberOfSymbols = NumberOfSymbols; + CP.Header.PointerToSymbolTable = SymbolTableStart; + + *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0]) + = CP.StringTable.size(); + + return true; +} + +template <typename value_type> +struct binary_le_impl { + value_type Value; + binary_le_impl(value_type V) : Value(V) {} +}; + +template <typename value_type> +raw_ostream &operator <<( raw_ostream &OS + , const binary_le_impl<value_type> &BLE) { + char Buffer[sizeof(BLE.Value)]; + support::endian::write_le<value_type, support::unaligned>(Buffer, BLE.Value); + OS.write(Buffer, sizeof(BLE.Value)); + return OS; +} + +template <typename value_type> +binary_le_impl<value_type> binary_le(value_type V) { + return binary_le_impl<value_type>(V); +} + +void writeCOFF(COFFParser &CP, raw_ostream &OS) { + OS << binary_le(CP.Header.Machine) + << binary_le(CP.Header.NumberOfSections) + << binary_le(CP.Header.TimeDateStamp) + << binary_le(CP.Header.PointerToSymbolTable) + << binary_le(CP.Header.NumberOfSymbols) + << binary_le(CP.Header.SizeOfOptionalHeader) + << binary_le(CP.Header.Characteristics); + + // Output section table. + for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(), + e = CP.Sections.end(); + i != e; ++i) { + OS.write(i->Header.Name, COFF::NameSize); + OS << binary_le(i->Header.VirtualSize) + << binary_le(i->Header.VirtualAddress) + << binary_le(i->Header.SizeOfRawData) + << binary_le(i->Header.PointerToRawData) + << binary_le(i->Header.PointerToRelocations) + << binary_le(i->Header.PointerToLineNumbers) + << binary_le(i->Header.NumberOfRelocations) + << binary_le(i->Header.NumberOfLineNumbers) + << binary_le(i->Header.Characteristics); + } + + // Output section data. + for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(), + e = CP.Sections.end(); + i != e; ++i) { + if (!i->Data.empty()) + OS.write(reinterpret_cast<const char*>(&i->Data[0]), i->Data.size()); + } + + // Output symbol table. + + for (std::vector<COFFParser::Symbol>::const_iterator i = CP.Symbols.begin(), + e = CP.Symbols.end(); + i != e; ++i) { + OS.write(i->Header.Name, COFF::NameSize); + OS << binary_le(i->Header.Value) + << binary_le(i->Header.SectionNumber) + << binary_le(i->Header.Type) + << binary_le(i->Header.StorageClass) + << binary_le(i->Header.NumberOfAuxSymbols); + if (!i->AuxSymbols.empty()) + OS.write( reinterpret_cast<const char*>(&i->AuxSymbols[0]) + , i->AuxSymbols.size()); + } + + // Output string table. + OS.write(&CP.StringTable[0], CP.StringTable.size()); +} + +int main(int argc, char **argv) { + cl::ParseCommandLineOptions(argc, argv); + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + OwningPtr<MemoryBuffer> Buf; + if (MemoryBuffer::getFileOrSTDIN(Input, Buf)) + return 1; + + SourceMgr SM; + yaml::Stream S(Buf->getBuffer(), SM); + COFFParser CP(S); + if (!CP.parse()) { + errs() << "yaml2obj: Failed to parse YAML file!\n"; + return 1; + } + if (!layoutCOFF(CP)) { + errs() << "yaml2obj: Failed to layout COFF file!\n"; + return 1; + } + writeCOFF(CP, outs()); +} |
