diff options
author | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
---|---|---|
committer | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
commit | f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc (patch) | |
tree | ebb79ea1ee5e3bc1fdf38541a811a8b804f0679a /include/llvm/Bitcode | |
download | external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.zip external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.gz external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.bz2 |
It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/llvm/Bitcode')
-rw-r--r-- | include/llvm/Bitcode/Archive.h | 555 | ||||
-rw-r--r-- | include/llvm/Bitcode/BitCodes.h | 179 | ||||
-rw-r--r-- | include/llvm/Bitcode/BitstreamReader.h | 465 | ||||
-rw-r--r-- | include/llvm/Bitcode/BitstreamWriter.h | 398 | ||||
-rw-r--r-- | include/llvm/Bitcode/LLVMBitCodes.h | 195 | ||||
-rw-r--r-- | include/llvm/Bitcode/ReaderWriter.h | 48 |
6 files changed, 1840 insertions, 0 deletions
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h new file mode 100644 index 0000000..980bd07 --- /dev/null +++ b/include/llvm/Bitcode/Archive.h @@ -0,0 +1,555 @@ +//===-- llvm/Bitcode/Archive.h - LLVM Bitcode Archive -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file declares the Archive and ArchiveMember classes that provide +// manipulation of LLVM Archive files. The implementation is provided by the +// lib/Bitcode/Archive library. This library is used to read and write +// archive (*.a) files that contain LLVM bitcode files (or others). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_ARCHIVE_H +#define LLVM_BITCODE_ARCHIVE_H + +#include "llvm/ADT/ilist" +#include "llvm/System/Path.h" +#include "llvm/System/MappedFile.h" +#include <map> +#include <set> +#include <fstream> + +namespace llvm { + +// Forward declare classes +class ModuleProvider; // From VMCore +class Module; // From VMCore +class Archive; // Declared below +class ArchiveMemberHeader; // Internal implementation class + +/// This class is the main class manipulated by users of the Archive class. It +/// holds information about one member of the Archive. It is also the element +/// stored by the Archive's ilist, the Archive's main abstraction. Because of +/// the special requirements of archive files, users are not permitted to +/// construct ArchiveMember instances. You should obtain them from the methods +/// of the Archive class instead. +/// @brief This class represents a single archive member. +class ArchiveMember { + /// @name Types + /// @{ + public: + /// These flags are used internally by the archive member to specify various + /// characteristics of the member. The various "is" methods below provide + /// access to the flags. The flags are not user settable. + enum Flags { + CompressedFlag = 1, ///< Member is a normal compressed file + SVR4SymbolTableFlag = 2, ///< Member is a SVR4 symbol table + BSD4SymbolTableFlag = 4, ///< Member is a BSD4 symbol table + LLVMSymbolTableFlag = 8, ///< Member is an LLVM symbol table + BitcodeFlag = 16, ///< Member is bitcode + HasPathFlag = 64, ///< Member has a full or partial path + HasLongFilenameFlag = 128, ///< Member uses the long filename syntax + StringTableFlag = 256 ///< Member is an ar(1) format string table + }; + + /// @} + /// @name Accessors + /// @{ + public: + /// @returns the parent Archive instance + /// @brief Get the archive associated with this member + Archive* getArchive() const { return parent; } + + /// @returns the path to the Archive's file + /// @brief Get the path to the archive member + const sys::Path& getPath() const { return path; } + + /// The "user" is the owner of the file per Unix security. This may not + /// have any applicability on non-Unix systems but is a required component + /// of the "ar" file format. + /// @brief Get the user associated with this archive member. + unsigned getUser() const { return info.getUser(); } + + /// The "group" is the owning group of the file per Unix security. This + /// may not have any applicability on non-Unix systems but is a required + /// component of the "ar" file format. + /// @brief Get the group associated with this archive member. + unsigned getGroup() const { return info.getGroup(); } + + /// The "mode" specifies the access permissions for the file per Unix + /// security. This may not have any applicabiity on non-Unix systems but is + /// a required component of the "ar" file format. + /// @brief Get the permission mode associated with this archive member. + unsigned getMode() const { return info.getMode(); } + + /// This method returns the time at which the archive member was last + /// modified when it was not in the archive. + /// @brief Get the time of last modification of the archive member. + sys::TimeValue getModTime() const { return info.getTimestamp(); } + + /// @returns the size of the archive member in bytes. + /// @brief Get the size of the archive member. + uint64_t getSize() const { return info.getSize(); } + + /// This method returns the total size of the archive member as it + /// appears on disk. This includes the file content, the header, the + /// long file name if any, and the padding. + /// @brief Get total on-disk member size. + unsigned getMemberSize() const; + + /// This method will return a pointer to the in-memory content of the + /// archive member, if it is available. If the data has not been loaded + /// into memory, the return value will be null. + /// @returns a pointer to the member's data. + /// @brief Get the data content of the archive member + const void* getData() const { return data; } + + /// This method determines if the member is a regular compressed file. + /// @returns true iff the archive member is a compressed regular file. + /// @brief Determine if the member is a compressed regular file. + bool isCompressed() const { return flags&CompressedFlag; } + + /// @returns true iff the member is a SVR4 (non-LLVM) symbol table + /// @brief Determine if this member is a SVR4 symbol table. + bool isSVR4SymbolTable() const { return flags&SVR4SymbolTableFlag; } + + /// @returns true iff the member is a BSD4.4 (non-LLVM) symbol table + /// @brief Determine if this member is a BSD4.4 symbol table. + bool isBSD4SymbolTable() const { return flags&BSD4SymbolTableFlag; } + + /// @returns true iff the archive member is the LLVM symbol table + /// @brief Determine if this member is the LLVM symbol table. + bool isLLVMSymbolTable() const { return flags&LLVMSymbolTableFlag; } + + /// @returns true iff the archive member is the ar(1) string table + /// @brief Determine if this member is the ar(1) string table. + bool isStringTable() const { return flags&StringTableFlag; } + + /// @returns true iff the archive member is a bitcode file. + /// @brief Determine if this member is a bitcode file. + bool isBitcode() const { return flags&BitcodeFlag; } + + /// @returns true iff the file name contains a path (directory) component. + /// @brief Determine if the member has a path + bool hasPath() const { return flags&HasPathFlag; } + + /// Long filenames are an artifact of the ar(1) file format which allows + /// up to sixteen characters in its header and doesn't allow a path + /// separator character (/). To avoid this, a "long format" member name is + /// allowed that doesn't have this restriction. This method determines if + /// that "long format" is used for this member. + /// @returns true iff the file name uses the long form + /// @brief Determin if the member has a long file name + bool hasLongFilename() const { return flags&HasLongFilenameFlag; } + + /// This method returns the status info (like Unix stat(2)) for the archive + /// member. The status info provides the file's size, permissions, and + /// modification time. The contents of the Path::StatusInfo structure, other + /// than the size and modification time, may not have utility on non-Unix + /// systems. + /// @returns the status info for the archive member + /// @brief Obtain the status info for the archive member + const sys::FileStatus &getFileStatus() const { return info; } + + /// This method causes the archive member to be replaced with the contents + /// of the file specified by \p File. The contents of \p this will be + /// updated to reflect the new data from \p File. The \p File must exist and + /// be readable on entry to this method. + /// @returns true if an error occurred, false otherwise + /// @brief Replace contents of archive member with a new file. + bool replaceWith(const sys::Path &aFile, std::string* ErrMsg); + + /// @} + /// @name ilist methods - do not use + /// @{ + public: + const ArchiveMember *getNext() const { return next; } + const ArchiveMember *getPrev() const { return prev; } + ArchiveMember *getNext() { return next; } + ArchiveMember *getPrev() { return prev; } + void setPrev(ArchiveMember* p) { prev = p; } + void setNext(ArchiveMember* n) { next = n; } + + /// @} + /// @name Data + /// @{ + private: + ArchiveMember* next; ///< Pointer to next archive member + ArchiveMember* prev; ///< Pointer to previous archive member + Archive* parent; ///< Pointer to parent archive + sys::PathWithStatus path; ///< Path of file containing the member + sys::FileStatus info; ///< Status info (size,mode,date) + unsigned flags; ///< Flags about the archive member + const void* data; ///< Data for the member + + /// @} + /// @name Constructors + /// @{ + public: + /// The default constructor is only used by the Archive's iplist when it + /// constructs the list's sentry node. + ArchiveMember(); + + private: + /// Used internally by the Archive class to construct an ArchiveMember. + /// The contents of the ArchiveMember are filled out by the Archive class. + ArchiveMember(Archive *PAR); + + // So Archive can construct an ArchiveMember + friend class llvm::Archive; + /// @} +}; + +/// This class defines the interface to LLVM Archive files. The Archive class +/// presents the archive file as an ilist of ArchiveMember objects. The members +/// can be rearranged in any fashion either by directly editing the ilist or by +/// using editing methods on the Archive class (recommended). The Archive +/// class also provides several ways of accessing the archive file for various +/// purposes such as editing and linking. Full symbol table support is provided +/// for loading only those files that resolve symbols. Note that read +/// performance of this library is _crucial_ for performance of JIT type +/// applications and the linkers. Consequently, the implementation of the class +/// is optimized for reading. +class Archive { + + /// @name Types + /// @{ + public: + /// This is the ilist type over which users may iterate to examine + /// the contents of the archive + /// @brief The ilist type of ArchiveMembers that Archive contains. + typedef iplist<ArchiveMember> MembersList; + + /// @brief Forward mutable iterator over ArchiveMember + typedef MembersList::iterator iterator; + + /// @brief Forward immutable iterator over ArchiveMember + typedef MembersList::const_iterator const_iterator; + + /// @brief Reverse mutable iterator over ArchiveMember + typedef std::reverse_iterator<iterator> reverse_iterator; + + /// @brief Reverse immutable iterator over ArchiveMember + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + + /// @brief The in-memory version of the symbol table + typedef std::map<std::string,unsigned> SymTabType; + + /// @} + /// @name ilist accessor methods + /// @{ + public: + inline iterator begin() { return members.begin(); } + inline const_iterator begin() const { return members.begin(); } + inline iterator end () { return members.end(); } + inline const_iterator end () const { return members.end(); } + + inline reverse_iterator rbegin() { return members.rbegin(); } + inline const_reverse_iterator rbegin() const { return members.rbegin(); } + inline reverse_iterator rend () { return members.rend(); } + inline const_reverse_iterator rend () const { return members.rend(); } + + inline unsigned size() const { return members.size(); } + inline bool empty() const { return members.empty(); } + inline const ArchiveMember& front() const { return members.front(); } + inline ArchiveMember& front() { return members.front(); } + inline const ArchiveMember& back() const { return members.back(); } + inline ArchiveMember& back() { return members.back(); } + + /// @} + /// @name ilist mutator methods + /// @{ + public: + /// This method splices a \p src member from an archive (possibly \p this), + /// to a position just before the member given by \p dest in \p this. When + /// the archive is written, \p src will be written in its new location. + /// @brief Move a member to a new location + inline void splice(iterator dest, Archive& arch, iterator src) + { return members.splice(dest,arch.members,src); } + + /// This method erases a \p target member from the archive. When the + /// archive is written, it will no longer contain \p target. The associated + /// ArchiveMember is deleted. + /// @brief Erase a member. + inline iterator erase(iterator target) { return members.erase(target); } + + /// @} + /// @name Constructors + /// @{ + public: + /// Create an empty archive file and associate it with the \p Filename. This + /// method does not actually create the archive disk file. It creates an + /// empty Archive object. If the writeToDisk method is called, the archive + /// file \p Filename will be created at that point, with whatever content + /// the returned Archive object has at that time. + /// @returns An Archive* that represents the new archive file. + /// @brief Create an empty Archive. + static Archive* CreateEmpty( + const sys::Path& Filename ///< Name of the archive to (eventually) create. + ); + + /// Open an existing archive and load its contents in preparation for + /// editing. After this call, the member ilist is completely populated based + /// on the contents of the archive file. You should use this form of open if + /// you intend to modify the archive or traverse its contents (e.g. for + /// printing). + /// @brief Open and load an archive file + static Archive* OpenAndLoad( + const sys::Path& filePath, ///< The file path to open and load + std::string* ErrorMessage ///< An optional error string + ); + + /// This method opens an existing archive file from \p Filename and reads in + /// its symbol table without reading in any of the archive's members. This + /// reduces both I/O and cpu time in opening the archive if it is to be used + /// solely for symbol lookup (e.g. during linking). The \p Filename must + /// exist and be an archive file or an exception will be thrown. This form + /// of opening the archive is intended for read-only operations that need to + /// locate members via the symbol table for link editing. Since the archve + /// members are not read by this method, the archive will appear empty upon + /// return. If editing operations are performed on the archive, they will + /// completely replace the contents of the archive! It is recommended that + /// if this form of opening the archive is used that only the symbol table + /// lookup methods (getSymbolTable, findModuleDefiningSymbol, and + /// findModulesDefiningSymbols) be used. + /// @throws std::string if an error occurs opening the file + /// @returns an Archive* that represents the archive file. + /// @brief Open an existing archive and load its symbols. + static Archive* OpenAndLoadSymbols( + const sys::Path& Filename, ///< Name of the archive file to open + std::string* ErrorMessage=0 ///< An optional error string + ); + + /// This destructor cleans up the Archive object, releases all memory, and + /// closes files. It does nothing with the archive file on disk. If you + /// haven't used the writeToDisk method by the time the destructor is + /// called, all changes to the archive will be lost. + /// @throws std::string if an error occurs + /// @brief Destruct in-memory archive + ~Archive(); + + /// @} + /// @name Accessors + /// @{ + public: + /// @returns the path to the archive file. + /// @brief Get the archive path. + const sys::Path& getPath() { return archPath; } + + /// This method is provided so that editing methods can be invoked directly + /// on the Archive's iplist of ArchiveMember. However, it is recommended + /// that the usual STL style iterator interface be used instead. + /// @returns the iplist of ArchiveMember + /// @brief Get the iplist of the members + MembersList& getMembers() { return members; } + + /// This method allows direct query of the Archive's symbol table. The + /// symbol table is a std::map of std::string (the symbol) to unsigned (the + /// file offset). Note that for efficiency reasons, the offset stored in + /// the symbol table is not the actual offset. It is the offset from the + /// beginning of the first "real" file member (after the symbol table). Use + /// the getFirstFileOffset() to obtain that offset and add this value to the + /// offset in the symbol table to obtain the real file offset. Note that + /// there is purposefully no interface provided by Archive to look up + /// members by their offset. Use the findModulesDefiningSymbols and + /// findModuleDefiningSymbol methods instead. + /// @returns the Archive's symbol table. + /// @brief Get the archive's symbol table + const SymTabType& getSymbolTable() { return symTab; } + + /// This method returns the offset in the archive file to the first "real" + /// file member. Archive files, on disk, have a signature and might have a + /// symbol table that precedes the first actual file member. This method + /// allows you to determine what the size of those fields are. + /// @returns the offset to the first "real" file member in the archive. + /// @brief Get the offset to the first "real" file member in the archive. + unsigned getFirstFileOffset() { return firstFileOffset; } + + /// This method will scan the archive for bitcode modules, interpret them + /// and return a vector of the instantiated modules in \p Modules. If an + /// error occurs, this method will return true. If \p ErrMessage is not null + /// and an error occurs, \p *ErrMessage will be set to a string explaining + /// the error that occurred. + /// @returns true if an error occurred + /// @brief Instantiate all the bitcode modules located in the archive + bool getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage); + + /// This accessor looks up the \p symbol in the archive's symbol table and + /// returns the associated module that defines that symbol. This method can + /// be called as many times as necessary. This is handy for linking the + /// archive into another module based on unresolved symbols. Note that the + /// ModuleProvider returned by this accessor should not be deleted by the + /// caller. It is managed internally by the Archive class. It is possible + /// that multiple calls to this accessor will return the same ModuleProvider + /// instance because the associated module defines multiple symbols. + /// @returns The ModuleProvider* found or null if the archive does not + /// contain a module that defines the \p symbol. + /// @brief Look up a module by symbol name. + ModuleProvider* findModuleDefiningSymbol( + const std::string& symbol, ///< Symbol to be sought + std::string* ErrMessage ///< Error message storage, if non-zero + ); + + /// This method is similar to findModuleDefiningSymbol but allows lookup of + /// more than one symbol at a time. If \p symbols contains a list of + /// undefined symbols in some module, then calling this method is like + /// making one complete pass through the archive to resolve symbols but is + /// more efficient than looking at the individual members. Note that on + /// exit, the symbols resolved by this method will be removed from \p + /// symbols to ensure they are not re-searched on a subsequent call. If + /// you need to retain the list of symbols, make a copy. + /// @brief Look up multiple symbols in the archive. + bool findModulesDefiningSymbols( + std::set<std::string>& symbols, ///< Symbols to be sought + std::set<ModuleProvider*>& modules, ///< The modules matching \p symbols + std::string* ErrMessage ///< Error msg storage, if non-zero + ); + + /// This method determines whether the archive is a properly formed llvm + /// bitcode archive. It first makes sure the symbol table has been loaded + /// and has a non-zero size. If it does, then it is an archive. If not, + /// then it tries to load all the bitcode modules of the archive. Finally, + /// it returns whether it was successfull. + /// @returns true if the archive is a proper llvm bitcode archive + /// @brief Determine whether the archive is a proper llvm bitcode archive. + bool isBitcodeArchive(); + + /// @} + /// @name Mutators + /// @{ + public: + /// This method is the only way to get the archive written to disk. It + /// creates or overwrites the file specified when \p this was created + /// or opened. The arguments provide options for writing the archive. If + /// \p CreateSymbolTable is true, the archive is scanned for bitcode files + /// and a symbol table of the externally visible function and global + /// variable names is created. If \p TruncateNames is true, the names of the + /// archive members will have their path component stripped and the file + /// name will be truncated at 15 characters. If \p Compress is specified, + /// all archive members will be compressed before being written. If + /// \p PrintSymTab is true, the symbol table will be printed to std::cout. + /// @returns true if an error occurred, \p error set to error message + /// @returns false if the writing succeeded. + /// @brief Write (possibly modified) archive contents to disk + bool writeToDisk( + bool CreateSymbolTable=false, ///< Create Symbol table + bool TruncateNames=false, ///< Truncate the filename to 15 chars + bool Compress=false, ///< Compress files + std::string* ErrMessage=0 ///< If non-null, where error msg is set + ); + + /// This method adds a new file to the archive. The \p filename is examined + /// to determine just enough information to create an ArchiveMember object + /// which is then inserted into the Archive object's ilist at the location + /// given by \p where. + /// @returns true if an error occured, false otherwise + /// @brief Add a file to the archive. + bool addFileBefore( + const sys::Path& filename, ///< The file to be added + iterator where, ///< Insertion point + std::string* ErrMsg ///< Optional error message location + ); + + /// @} + /// @name Implementation + /// @{ + protected: + /// @brief Construct an Archive for \p filename and optionally map it + /// into memory. + Archive(const sys::Path& filename); + + /// @param error Set to address of a std::string to get error messages + /// @returns false on error + /// @brief Parse the symbol table at \p data. + bool parseSymbolTable(const void* data,unsigned len,std::string* error); + + /// @returns A fully populated ArchiveMember or 0 if an error occurred. + /// @brief Parse the header of a member starting at \p At + ArchiveMember* parseMemberHeader( + const char*&At, ///< The pointer to the location we're parsing + const char*End, ///< The pointer to the end of the archive + std::string* error ///< Optional error message catcher + ); + + /// @param error Set to address of a std::string to get error messages + /// @returns false on error + /// @brief Check that the archive signature is correct + bool checkSignature(std::string* ErrMessage); + + /// @param error Set to address of a std::string to get error messages + /// @returns false on error + /// @brief Load the entire archive. + bool loadArchive(std::string* ErrMessage); + + /// @param error Set to address of a std::string to get error messages + /// @returns false on error + /// @brief Load just the symbol table. + bool loadSymbolTable(std::string* ErrMessage); + + /// @brief Write the symbol table to an ofstream. + void writeSymbolTable(std::ofstream& ARFile); + + /// Writes one ArchiveMember to an ofstream. If an error occurs, returns + /// false, otherwise true. If an error occurs and error is non-null then + /// it will be set to an error message. + /// @returns false Writing member succeeded + /// @returns true Writing member failed, \p error set to error message + bool writeMember( + const ArchiveMember& member, ///< The member to be written + std::ofstream& ARFile, ///< The file to write member onto + bool CreateSymbolTable, ///< Should symbol table be created? + bool TruncateNames, ///< Should names be truncated to 11 chars? + bool ShouldCompress, ///< Should the member be compressed? + std::string* ErrMessage ///< If non-null, place were error msg is set + ); + + /// @brief Fill in an ArchiveMemberHeader from ArchiveMember. + bool fillHeader(const ArchiveMember&mbr, + ArchiveMemberHeader& hdr,int sz, bool TruncateNames) const; + + /// @brief Maps archive into memory + bool mapToMemory(std::string* ErrMsg); + + /// @brief Frees all the members and unmaps the archive file. + void cleanUpMemory(); + + /// This type is used to keep track of bitcode modules loaded from the + /// symbol table. It maps the file offset to a pair that consists of the + /// associated ArchiveMember and the ModuleProvider. + /// @brief Module mapping type + typedef std::map<unsigned,std::pair<ModuleProvider*,ArchiveMember*> > + ModuleMap; + + + /// @} + /// @name Data + /// @{ + protected: + sys::Path archPath; ///< Path to the archive file we read/write + MembersList members; ///< The ilist of ArchiveMember + sys::MappedFile* mapfile; ///< Raw Archive contents mapped into memory + const char* base; ///< Base of the memory mapped file data + SymTabType symTab; ///< The symbol table + std::string strtab; ///< The string table for long file names + unsigned symTabSize; ///< Size in bytes of symbol table + unsigned firstFileOffset; ///< Offset to first normal file. + ModuleMap modules; ///< The modules loaded via symbol lookup. + ArchiveMember* foreignST; ///< This holds the foreign symbol table. + /// @} + /// @name Hidden + /// @{ + private: + Archive(); ///< Do not implement + Archive(const Archive&); ///< Do not implement + Archive& operator=(const Archive&); ///< Do not implement + /// @} +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h new file mode 100644 index 0000000..59d57e7 --- /dev/null +++ b/include/llvm/Bitcode/BitCodes.h @@ -0,0 +1,179 @@ +//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header Bitcode enum values. +// +// The enum values defined in this file should be considered permanent. If +// new features are added, they should have values added at the end of the +// respective lists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_BITCODES_H +#define LLVM_BITCODE_BITCODES_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" +#include <cassert> + +namespace llvm { +namespace bitc { + enum StandardWidths { + BlockIDWidth = 8, // We use VBR-8 for block IDs. + CodeLenWidth = 4, // Codelen are VBR-4. + BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 32GB per block. + }; + + // The standard abbrev namespace always has a way to exit a block, enter a + // nested block, define abbrevs, and define an unabbreviated record. + enum FixedAbbrevIDs { + END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode. + ENTER_SUBBLOCK = 1, + + /// DEFINE_ABBREV - Defines an abbrev for the current block. It consists + /// of a vbr5 for # operand infos. Each operand info is emitted with a + /// single bit to indicate if it is a literal encoding. If so, the value is + /// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed + /// by the info value as a vbr5 if needed. + DEFINE_ABBREV = 2, + + // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by + // a vbr6 for the # operands, followed by vbr6's for each operand. + UNABBREV_RECORD = 3, + + // This is not a code, this is a marker for the first abbrev assignment. + FIRST_APPLICATION_ABBREV = 4 + }; + + /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO + /// block, which contains metadata about other blocks in the file. + enum StandardBlockIDs { + /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example, + /// standard abbrevs that should be available to all blocks of a specified + /// ID. + BLOCKINFO_BLOCK_ID = 0, + + // Block IDs 1-7 are reserved for future expansion. + FIRST_APPLICATION_BLOCKID = 8 + }; + + /// BlockInfoCodes - The blockinfo block contains metadata about user-defined + /// blocks. + enum BlockInfoCodes { + BLOCKINFO_CODE_SETBID = 1 // SETBID: [blockid#] + // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd + // block, instead of the BlockInfo block. + // BLOCKNAME: give string name to block, if desired. + }; + +} // End bitc namespace + +/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation. +/// This is actually a union of two different things: +/// 1. It could be a literal integer value ("the operand is always 17"). +/// 2. It could be an encoding specification ("this operand encoded like so"). +/// +class BitCodeAbbrevOp { + uint64_t Val; // A literal value or data for an encoding. + bool IsLiteral : 1; // Indicate whether this is a literal value or not. + unsigned Enc : 3; // The encoding to use. +public: + enum Encoding { + Fixed = 1, // A fixed width field, Val specifies number of bits. + VBR = 2, // A VBR field where Val specifies the width of each chunk. + Array = 3, // A sequence of fields, next field species elt encoding. + Char6 = 4 // A 6-bit fixed field which maps to [a-zA-Z0-9._]. + }; + + BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {} + BitCodeAbbrevOp(Encoding E, uint64_t Data = 0) + : Val(Data), IsLiteral(false), Enc(E) {} + + bool isLiteral() const { return IsLiteral; } + bool isEncoding() const { return !IsLiteral; } + + // Accessors for literals. + uint64_t getLiteralValue() const { assert(isLiteral()); return Val; } + + // Accessors for encoding info. + Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; } + uint64_t getEncodingData() const { + assert(isEncoding() && hasEncodingData()); + return Val; + } + + bool hasEncodingData() const { return hasEncodingData(getEncoding()); } + static bool hasEncodingData(Encoding E) { + switch (E) { + default: assert(0 && "Unknown encoding"); + case Fixed: + case VBR: + return true; + case Array: + case Char6: + return false; + } + } + + /// isChar6 - Return true if this character is legal in the Char6 encoding. + static bool isChar6(char C) { + if (C >= 'a' && C <= 'z') return true; + if (C >= 'A' && C <= 'Z') return true; + if (C >= '0' && C <= '9') return true; + if (C == '.' || C == '_') return true; + return false; + } + static unsigned EncodeChar6(char C) { + if (C >= 'a' && C <= 'z') return C-'a'; + if (C >= 'A' && C <= 'Z') return C-'A'+26; + if (C >= '0' && C <= '9') return C-'0'+26+26; + if (C == '.') return 62; + if (C == '_') return 63; + assert(0 && "Not a value Char6 character!"); + return 0; + } + + static char DecodeChar6(unsigned V) { + assert((V & ~63) == 0 && "Not a Char6 encoded character!"); + if (V < 26) return V+'a'; + if (V < 26+26) return V-26+'A'; + if (V < 26+26+10) return V-26-26+'0'; + if (V == 62) return '.'; + if (V == 63) return '_'; + assert(0 && "Not a value Char6 character!"); + return ' '; + } + +}; + +/// BitCodeAbbrev - This class represents an abbreviation record. An +/// abbreviation allows a complex record that has redundancy to be stored in a +/// specialized format instead of the fully-general, fully-vbr, format. +class BitCodeAbbrev { + SmallVector<BitCodeAbbrevOp, 8> OperandList; + unsigned char RefCount; // Number of things using this. + ~BitCodeAbbrev() {} +public: + BitCodeAbbrev() : RefCount(1) {} + + void addRef() { ++RefCount; } + void dropRef() { if (--RefCount == 0) delete this; } + + unsigned getNumOperandInfos() const { return OperandList.size(); } + const BitCodeAbbrevOp &getOperandInfo(unsigned N) const { + return OperandList[N]; + } + + void Add(const BitCodeAbbrevOp &OpInfo) { + OperandList.push_back(OpInfo); + } +}; +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h new file mode 100644 index 0000000..86a26c2 --- /dev/null +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -0,0 +1,465 @@ +//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitstreamReader class. This class can be used to +// read an arbitrary bitstream, regardless of its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef BITSTREAM_READER_H +#define BITSTREAM_READER_H + +#include "llvm/Bitcode/BitCodes.h" +#include <vector> + +namespace llvm { + +class BitstreamReader { + const unsigned char *NextChar; + const unsigned char *LastChar; + + /// CurWord - This is the current data we have pulled from the stream but have + /// not returned to the client. + uint32_t CurWord; + + /// BitsInCurWord - This is the number of bits in CurWord that are valid. This + /// is always from [0...31] inclusive. + unsigned BitsInCurWord; + + // CurCodeSize - This is the declared size of code values used for the current + // block, in bits. + unsigned CurCodeSize; + + /// CurAbbrevs - Abbrevs installed at in this block. + std::vector<BitCodeAbbrev*> CurAbbrevs; + + struct Block { + unsigned PrevCodeSize; + std::vector<BitCodeAbbrev*> PrevAbbrevs; + explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} + }; + + /// BlockScope - This tracks the codesize of parent blocks. + SmallVector<Block, 8> BlockScope; + + /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. + /// These describe abbreviations that all blocks of the specified ID inherit. + struct BlockInfo { + unsigned BlockID; + std::vector<BitCodeAbbrev*> Abbrevs; + }; + std::vector<BlockInfo> BlockInfoRecords; + + /// FirstChar - This remembers the first byte of the stream. + const unsigned char *FirstChar; +public: + BitstreamReader() { + NextChar = FirstChar = LastChar = 0; + CurWord = 0; + BitsInCurWord = 0; + CurCodeSize = 0; + } + + BitstreamReader(const unsigned char *Start, const unsigned char *End) { + init(Start, End); + } + + void init(const unsigned char *Start, const unsigned char *End) { + NextChar = FirstChar = Start; + LastChar = End; + assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); + CurWord = 0; + BitsInCurWord = 0; + CurCodeSize = 2; + } + + ~BitstreamReader() { + // Abbrevs could still exist if the stream was broken. If so, don't leak + // them. + for (unsigned i = 0, e = CurAbbrevs.size(); i != e; ++i) + CurAbbrevs[i]->dropRef(); + + for (unsigned S = 0, e = BlockScope.size(); S != e; ++S) { + std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs; + for (unsigned i = 0, e = Abbrevs.size(); i != e; ++i) + Abbrevs[i]->dropRef(); + } + + // Free the BlockInfoRecords. + while (!BlockInfoRecords.empty()) { + BlockInfo &Info = BlockInfoRecords.back(); + // Free blockinfo abbrev info. + for (unsigned i = 0, e = Info.Abbrevs.size(); i != e; ++i) + Info.Abbrevs[i]->dropRef(); + BlockInfoRecords.pop_back(); + } + } + + bool AtEndOfStream() const { + return NextChar == LastChar && BitsInCurWord == 0; + } + + /// GetCurrentBitNo - Return the bit # of the bit we are reading. + uint64_t GetCurrentBitNo() const { + return (NextChar-FirstChar)*8 + ((32-BitsInCurWord) & 31); + } + + /// JumpToBit - Reset the stream to the specified bit number. + void JumpToBit(uint64_t BitNo) { + uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3; + uintptr_t WordBitNo = uintptr_t(BitNo) & 31; + assert(ByteNo < (uintptr_t)(LastChar-FirstChar) && "Invalid location"); + + // Move the cursor to the right word. + NextChar = FirstChar+ByteNo; + BitsInCurWord = 0; + + // Skip over any bits that are already consumed. + if (WordBitNo) { + NextChar -= 4; + Read(WordBitNo); + } + } + + /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #. + unsigned GetAbbrevIDWidth() const { return CurCodeSize; } + + uint32_t Read(unsigned NumBits) { + // If the field is fully contained by CurWord, return it quickly. + if (BitsInCurWord >= NumBits) { + uint32_t R = CurWord & ((1U << NumBits)-1); + CurWord >>= NumBits; + BitsInCurWord -= NumBits; + return R; + } + + // If we run out of data, stop at the end of the stream. + if (LastChar == NextChar) { + CurWord = 0; + BitsInCurWord = 0; + return 0; + } + + unsigned R = CurWord; + + // Read the next word from the stream. + CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) | + (NextChar[2] << 16) | (NextChar[3] << 24); + NextChar += 4; + + // Extract NumBits-BitsInCurWord from what we just read. + unsigned BitsLeft = NumBits-BitsInCurWord; + + // Be careful here, BitsLeft is in the range [1..32] inclusive. + R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord; + + // BitsLeft bits have just been used up from CurWord. + if (BitsLeft != 32) + CurWord >>= BitsLeft; + else + CurWord = 0; + BitsInCurWord = 32-BitsLeft; + return R; + } + + uint64_t Read64(unsigned NumBits) { + if (NumBits <= 32) return Read(NumBits); + + uint64_t V = Read(32); + return V | (uint64_t)Read(NumBits-32) << 32; + } + + uint32_t ReadVBR(unsigned NumBits) { + uint32_t Piece = Read(NumBits); + if ((Piece & (1U << (NumBits-1))) == 0) + return Piece; + + uint32_t Result = 0; + unsigned NextBit = 0; + while (1) { + Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + + if ((Piece & (1U << (NumBits-1))) == 0) + return Result; + + NextBit += NumBits-1; + Piece = Read(NumBits); + } + } + + uint64_t ReadVBR64(unsigned NumBits) { + uint64_t Piece = Read(NumBits); + if ((Piece & (1U << (NumBits-1))) == 0) + return Piece; + + uint64_t Result = 0; + unsigned NextBit = 0; + while (1) { + Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + + if ((Piece & (1U << (NumBits-1))) == 0) + return Result; + + NextBit += NumBits-1; + Piece = Read(NumBits); + } + } + + void SkipToWord() { + BitsInCurWord = 0; + CurWord = 0; + } + + + unsigned ReadCode() { + return Read(CurCodeSize); + } + + //===--------------------------------------------------------------------===// + // Block Manipulation + //===--------------------------------------------------------------------===// + +private: + /// getBlockInfo - If there is block info for the specified ID, return it, + /// otherwise return null. + BlockInfo *getBlockInfo(unsigned BlockID) { + // Common case, the most recent entry matches BlockID. + if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) + return &BlockInfoRecords.back(); + + for (unsigned i = 0, e = BlockInfoRecords.size(); i != e; ++i) + if (BlockInfoRecords[i].BlockID == BlockID) + return &BlockInfoRecords[i]; + return 0; + } +public: + + + // Block header: + // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] + + /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for + /// the block. + unsigned ReadSubBlockID() { + return ReadVBR(bitc::BlockIDWidth); + } + + /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip + /// over the body of this block. If the block record is malformed, return + /// true. + bool SkipBlock() { + // Read and ignore the codelen value. Since we are skipping this block, we + // don't care what code widths are used inside of it. + ReadVBR(bitc::CodeLenWidth); + SkipToWord(); + unsigned NumWords = Read(bitc::BlockSizeWidth); + + // Check that the block wasn't partially defined, and that the offset isn't + // bogus. + if (AtEndOfStream() || NextChar+NumWords*4 > LastChar) + return true; + + NextChar += NumWords*4; + return false; + } + + /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, read and enter + /// the block, returning the BlockID of the block we just entered. + bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) { + // Save the current block's state on BlockScope. + BlockScope.push_back(Block(CurCodeSize)); + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + + // Add the abbrevs specific to this block to the CurAbbrevs list. + if (BlockInfo *Info = getBlockInfo(BlockID)) { + for (unsigned i = 0, e = Info->Abbrevs.size(); i != e; ++i) { + CurAbbrevs.push_back(Info->Abbrevs[i]); + CurAbbrevs.back()->addRef(); + } + } + + // Get the codesize of this block. + CurCodeSize = ReadVBR(bitc::CodeLenWidth); + SkipToWord(); + unsigned NumWords = Read(bitc::BlockSizeWidth); + if (NumWordsP) *NumWordsP = NumWords; + + // Validate that this block is sane. + if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar) + return true; + + return false; + } + + bool ReadBlockEnd() { + if (BlockScope.empty()) return true; + + // Block tail: + // [END_BLOCK, <align4bytes>] + SkipToWord(); + CurCodeSize = BlockScope.back().PrevCodeSize; + + // Delete abbrevs from popped scope. + for (unsigned i = 0, e = CurAbbrevs.size(); i != e; ++i) + CurAbbrevs[i]->dropRef(); + + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + BlockScope.pop_back(); + return false; + } + + //===--------------------------------------------------------------------===// + // Record Processing + //===--------------------------------------------------------------------===// + +private: + void ReadAbbreviatedField(const BitCodeAbbrevOp &Op, + SmallVectorImpl<uint64_t> &Vals) { + if (Op.isLiteral()) { + // If the abbrev specifies the literal value to use, use it. + Vals.push_back(Op.getLiteralValue()); + } else { + // Decode the value as we are commanded. + switch (Op.getEncoding()) { + default: assert(0 && "Unknown encoding!"); + case BitCodeAbbrevOp::Fixed: + Vals.push_back(Read((unsigned)Op.getEncodingData())); + break; + case BitCodeAbbrevOp::VBR: + Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); + break; + case BitCodeAbbrevOp::Char6: + Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); + break; + } + } + } +public: + unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals) { + if (AbbrevID == bitc::UNABBREV_RECORD) { + unsigned Code = ReadVBR(6); + unsigned NumElts = ReadVBR(6); + for (unsigned i = 0; i != NumElts; ++i) + Vals.push_back(ReadVBR64(6)); + return Code; + } + + unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; + assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); + BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; + + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral() || Op.getEncoding() != BitCodeAbbrevOp::Array) { + ReadAbbreviatedField(Op, Vals); + } else { + // Array case. Read the number of elements as a vbr6. + unsigned NumElts = ReadVBR(6); + + // Get the element encoding. + assert(i+2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // Read all the elements. + for (; NumElts; --NumElts) + ReadAbbreviatedField(EltEnc, Vals); + } + } + + unsigned Code = (unsigned)Vals[0]; + Vals.erase(Vals.begin()); + return Code; + } + + //===--------------------------------------------------------------------===// + // Abbrev Processing + //===--------------------------------------------------------------------===// + + void ReadAbbrevRecord() { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + unsigned NumOpInfo = ReadVBR(5); + for (unsigned i = 0; i != NumOpInfo; ++i) { + bool IsLiteral = Read(1); + if (IsLiteral) { + Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); + continue; + } + + BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); + if (BitCodeAbbrevOp::hasEncodingData(E)) + Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); + else + Abbv->Add(BitCodeAbbrevOp(E)); + } + CurAbbrevs.push_back(Abbv); + } + + //===--------------------------------------------------------------------===// + // BlockInfo Block Reading + //===--------------------------------------------------------------------===// + +private: + BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { + if (BlockInfo *BI = getBlockInfo(BlockID)) + return *BI; + + // Otherwise, add a new record. + BlockInfoRecords.push_back(BlockInfo()); + BlockInfoRecords.back().BlockID = BlockID; + return BlockInfoRecords.back(); + } + +public: + + bool ReadBlockInfoBlock() { + if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; + + SmallVector<uint64_t, 64> Record; + BlockInfo *CurBlockInfo = 0; + + // Read all the records for this module. + while (1) { + unsigned Code = ReadCode(); + if (Code == bitc::END_BLOCK) + return ReadBlockEnd(); + if (Code == bitc::ENTER_SUBBLOCK) { + ReadSubBlockID(); + if (SkipBlock()) return true; + continue; + } + + // Read abbrev records, associate them with CurBID. + if (Code == bitc::DEFINE_ABBREV) { + if (!CurBlockInfo) return true; + ReadAbbrevRecord(); + + // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the + // appropriate BlockInfo. + BitCodeAbbrev *Abbv = CurAbbrevs.back(); + CurAbbrevs.pop_back(); + CurBlockInfo->Abbrevs.push_back(Abbv); + continue; + } + + // Read a record. + Record.clear(); + switch (ReadRecord(Code, Record)) { + default: break; // Default behavior, ignore unknown content. + case bitc::BLOCKINFO_CODE_SETBID: + if (Record.size() < 1) return true; + CurBlockInfo = &getOrCreateBlockInfo((unsigned)Record[0]); + break; + } + } + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h new file mode 100644 index 0000000..64598ed --- /dev/null +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -0,0 +1,398 @@ +//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitstreamWriter class. This class can be used to +// write an arbitrary bitstream, regardless of its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef BITSTREAM_WRITER_H +#define BITSTREAM_WRITER_H + +#include "llvm/Bitcode/BitCodes.h" +#include <vector> + +namespace llvm { + +class BitstreamWriter { + std::vector<unsigned char> &Out; + + /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. + unsigned CurBit; + + /// CurValue - The current value. Only bits < CurBit are valid. + uint32_t CurValue; + + /// CurCodeSize - This is the declared size of code values used for the + /// current block, in bits. + unsigned CurCodeSize; + + /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently + /// selected BLOCK ID. + unsigned BlockInfoCurBID; + + /// CurAbbrevs - Abbrevs installed at in this block. + std::vector<BitCodeAbbrev*> CurAbbrevs; + + struct Block { + unsigned PrevCodeSize; + unsigned StartSizeWord; + std::vector<BitCodeAbbrev*> PrevAbbrevs; + Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} + }; + + /// BlockScope - This tracks the current blocks that we have entered. + std::vector<Block> BlockScope; + + /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. + /// These describe abbreviations that all blocks of the specified ID inherit. + struct BlockInfo { + unsigned BlockID; + std::vector<BitCodeAbbrev*> Abbrevs; + }; + std::vector<BlockInfo> BlockInfoRecords; + +public: + BitstreamWriter(std::vector<unsigned char> &O) + : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} + + ~BitstreamWriter() { + assert(CurBit == 0 && "Unflused data remaining"); + assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); + + // Free the BlockInfoRecords. + while (!BlockInfoRecords.empty()) { + BlockInfo &Info = BlockInfoRecords.back(); + // Free blockinfo abbrev info. + for (unsigned i = 0, e = Info.Abbrevs.size(); i != e; ++i) + Info.Abbrevs[i]->dropRef(); + BlockInfoRecords.pop_back(); + } + } + //===--------------------------------------------------------------------===// + // Basic Primitives for emitting bits to the stream. + //===--------------------------------------------------------------------===// + + void Emit(uint32_t Val, unsigned NumBits) { + assert(NumBits <= 32 && "Invalid value size!"); + assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); + CurValue |= Val << CurBit; + if (CurBit + NumBits < 32) { + CurBit += NumBits; + return; + } + + // Add the current word. + unsigned V = CurValue; + Out.push_back((unsigned char)(V >> 0)); + Out.push_back((unsigned char)(V >> 8)); + Out.push_back((unsigned char)(V >> 16)); + Out.push_back((unsigned char)(V >> 24)); + + if (CurBit) + CurValue = Val >> (32-CurBit); + else + CurValue = 0; + CurBit = (CurBit+NumBits) & 31; + } + + void Emit64(uint64_t Val, unsigned NumBits) { + if (NumBits <= 32) + Emit((uint32_t)Val, NumBits); + else { + Emit((uint32_t)Val, 32); + Emit((uint32_t)(Val >> 32), NumBits-32); + } + } + + void FlushToWord() { + if (CurBit) { + unsigned V = CurValue; + Out.push_back((unsigned char)(V >> 0)); + Out.push_back((unsigned char)(V >> 8)); + Out.push_back((unsigned char)(V >> 16)); + Out.push_back((unsigned char)(V >> 24)); + CurBit = 0; + CurValue = 0; + } + } + + void EmitVBR(uint32_t Val, unsigned NumBits) { + uint32_t Threshold = 1U << (NumBits-1); + + // Emit the bits with VBR encoding, NumBits-1 bits at a time. + while (Val >= Threshold) { + Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); + Val >>= NumBits-1; + } + + Emit(Val, NumBits); + } + + void EmitVBR64(uint64_t Val, unsigned NumBits) { + if ((uint32_t)Val == Val) + return EmitVBR((uint32_t)Val, NumBits); + + uint64_t Threshold = 1U << (NumBits-1); + + // Emit the bits with VBR encoding, NumBits-1 bits at a time. + while (Val >= Threshold) { + Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | + (1 << (NumBits-1)), NumBits); + Val >>= NumBits-1; + } + + Emit((uint32_t)Val, NumBits); + } + + /// EmitCode - Emit the specified code. + void EmitCode(unsigned Val) { + Emit(Val, CurCodeSize); + } + + //===--------------------------------------------------------------------===// + // Block Manipulation + //===--------------------------------------------------------------------===// + + /// getBlockInfo - If there is block info for the specified ID, return it, + /// otherwise return null. + BlockInfo *getBlockInfo(unsigned BlockID) { + // Common case, the most recent entry matches BlockID. + if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) + return &BlockInfoRecords.back(); + + for (unsigned i = 0, e = BlockInfoRecords.size(); i != e; ++i) + if (BlockInfoRecords[i].BlockID == BlockID) + return &BlockInfoRecords[i]; + return 0; + } + + void EnterSubblock(unsigned BlockID, unsigned CodeLen) { + // Block header: + // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] + EmitCode(bitc::ENTER_SUBBLOCK); + EmitVBR(BlockID, bitc::BlockIDWidth); + EmitVBR(CodeLen, bitc::CodeLenWidth); + FlushToWord(); + + unsigned BlockSizeWordLoc = Out.size(); + unsigned OldCodeSize = CurCodeSize; + + // Emit a placeholder, which will be replaced when the block is popped. + Emit(0, bitc::BlockSizeWidth); + + CurCodeSize = CodeLen; + + // Push the outer block's abbrev set onto the stack, start out with an + // empty abbrev set. + BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4)); + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + + // If there is a blockinfo for this BlockID, add all the predefined abbrevs + // to the abbrev list. + if (BlockInfo *Info = getBlockInfo(BlockID)) { + for (unsigned i = 0, e = Info->Abbrevs.size(); i != e; ++i) { + CurAbbrevs.push_back(Info->Abbrevs[i]); + Info->Abbrevs[i]->addRef(); + } + } + } + + void ExitBlock() { + assert(!BlockScope.empty() && "Block scope imbalance!"); + + // Delete all abbrevs. + for (unsigned i = 0, e = CurAbbrevs.size(); i != e; ++i) + CurAbbrevs[i]->dropRef(); + + const Block &B = BlockScope.back(); + + // Block tail: + // [END_BLOCK, <align4bytes>] + EmitCode(bitc::END_BLOCK); + FlushToWord(); + + // Compute the size of the block, in words, not counting the size field. + unsigned SizeInWords = Out.size()/4-B.StartSizeWord - 1; + unsigned ByteNo = B.StartSizeWord*4; + + // Update the block size field in the header of this sub-block. + Out[ByteNo++] = (unsigned char)(SizeInWords >> 0); + Out[ByteNo++] = (unsigned char)(SizeInWords >> 8); + Out[ByteNo++] = (unsigned char)(SizeInWords >> 16); + Out[ByteNo++] = (unsigned char)(SizeInWords >> 24); + + // Restore the inner block's code size and abbrev table. + CurCodeSize = B.PrevCodeSize; + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + BlockScope.pop_back(); + } + + //===--------------------------------------------------------------------===// + // Record Emission + //===--------------------------------------------------------------------===// + +private: + /// EmitAbbreviatedField - Emit a single scalar field value with the specified + /// encoding. + template<typename uintty> + void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { + if (Op.isLiteral()) { + // If the abbrev specifies the literal value to use, don't emit + // anything. + assert(V == Op.getLiteralValue() && + "Invalid abbrev for record!"); + return; + } + + // Encode the value as we are commanded. + switch (Op.getEncoding()) { + default: assert(0 && "Unknown encoding!"); + case BitCodeAbbrevOp::Fixed: + Emit((unsigned)V, (unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::VBR: + EmitVBR64(V, (unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::Char6: + Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); + break; + } + } +public: + + /// EmitRecord - Emit the specified record to the stream, using an abbrev if + /// we have one to compress the output. + template<typename uintty> + void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals, + unsigned Abbrev = 0) { + if (Abbrev) { + unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; + assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); + BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; + + EmitCode(Abbrev); + + // Insert the code into Vals to treat it uniformly. + Vals.insert(Vals.begin(), Code); + + unsigned RecordIdx = 0; + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral() || Op.getEncoding() != BitCodeAbbrevOp::Array) { + assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); + EmitAbbreviatedField(Op, Vals[RecordIdx]); + ++RecordIdx; + } else { + // Array case. + assert(i+2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // Emit a vbr6 to indicate the number of elements present. + EmitVBR(Vals.size()-RecordIdx, 6); + + // Emit each field. + for (; RecordIdx != Vals.size(); ++RecordIdx) + EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); + } + } + assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); + } else { + // If we don't have an abbrev to use, emit this in its fully unabbreviated + // form. + EmitCode(bitc::UNABBREV_RECORD); + EmitVBR(Code, 6); + EmitVBR(Vals.size(), 6); + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + EmitVBR64(Vals[i], 6); + } + } + + //===--------------------------------------------------------------------===// + // Abbrev Emission + //===--------------------------------------------------------------------===// + +private: + // Emit the abbreviation as a DEFINE_ABBREV record. + void EncodeAbbrev(BitCodeAbbrev *Abbv) { + EmitCode(bitc::DEFINE_ABBREV); + EmitVBR(Abbv->getNumOperandInfos(), 5); + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + Emit(Op.isLiteral(), 1); + if (Op.isLiteral()) { + EmitVBR64(Op.getLiteralValue(), 8); + } else { + Emit(Op.getEncoding(), 3); + if (Op.hasEncodingData()) + EmitVBR64(Op.getEncodingData(), 5); + } + } + } +public: + + /// EmitAbbrev - This emits an abbreviation to the stream. Note that this + /// method takes ownership of the specified abbrev. + unsigned EmitAbbrev(BitCodeAbbrev *Abbv) { + // Emit the abbreviation as a record. + EncodeAbbrev(Abbv); + CurAbbrevs.push_back(Abbv); + return CurAbbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; + } + + //===--------------------------------------------------------------------===// + // BlockInfo Block Emission + //===--------------------------------------------------------------------===// + + /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. + void EnterBlockInfoBlock(unsigned CodeWidth) { + EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); + BlockInfoCurBID = -1U; + } +private: + /// SwitchToBlockID - If we aren't already talking about the specified block + /// ID, emit a BLOCKINFO_CODE_SETBID record. + void SwitchToBlockID(unsigned BlockID) { + if (BlockInfoCurBID == BlockID) return; + SmallVector<unsigned, 2> V; + V.push_back(BlockID); + EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); + BlockInfoCurBID = BlockID; + } + + BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { + if (BlockInfo *BI = getBlockInfo(BlockID)) + return *BI; + + // Otherwise, add a new record. + BlockInfoRecords.push_back(BlockInfo()); + BlockInfoRecords.back().BlockID = BlockID; + return BlockInfoRecords.back(); + } + +public: + + /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified + /// BlockID. + unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) { + SwitchToBlockID(BlockID); + EncodeAbbrev(Abbv); + + // Add the abbrev to the specified block record. + BlockInfo &Info = getOrCreateBlockInfo(BlockID); + Info.Abbrevs.push_back(Abbv); + + return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; + } +}; + + +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h new file mode 100644 index 0000000..24e6729 --- /dev/null +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -0,0 +1,195 @@ +//===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines Bitcode enum values for LLVM IR bitcode files. +// +// The enum values defined in this file should be considered permanent. If +// new features are added, they should have values added at the end of the +// respective lists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_LLVMBITCODES_H +#define LLVM_BITCODE_LLVMBITCODES_H + +#include "llvm/Bitcode/BitCodes.h" + +namespace llvm { +namespace bitc { + // The only top-level block type defined is for a module. + enum BlockIDs { + // Blocks + MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, + + // Module sub-block id's. + PARAMATTR_BLOCK_ID, + TYPE_BLOCK_ID, + CONSTANTS_BLOCK_ID, + FUNCTION_BLOCK_ID, + TYPE_SYMTAB_BLOCK_ID, + VALUE_SYMTAB_BLOCK_ID + }; + + + /// MODULE blocks have a number of optional fields and subblocks. + enum ModuleCodes { + MODULE_CODE_VERSION = 1, // VERSION: [version#] + MODULE_CODE_TRIPLE = 2, // TRIPLE: [strchr x N] + MODULE_CODE_DATALAYOUT = 3, // DATALAYOUT: [strchr x N] + MODULE_CODE_ASM = 4, // ASM: [strchr x N] + MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N] + MODULE_CODE_DEPLIB = 6, // DEPLIB: [strchr x N] + + // GLOBALVAR: [type, isconst, initid, + // linkage, alignment, section, visibility, threadlocal] + MODULE_CODE_GLOBALVAR = 7, + + // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, + // section, visibility] + MODULE_CODE_FUNCTION = 8, + + // ALIAS: [alias type, aliasee val#, linkage] + MODULE_CODE_ALIAS = 9, + + /// MODULE_CODE_PURGEVALS: [numvals] + MODULE_CODE_PURGEVALS = 10 + }; + + /// PARAMATTR blocks have code for defining a parameter attribute set. + enum ParamAttrCodes { + PARAMATTR_CODE_ENTRY = 1 // ENTRY: [paramidx0, attr0, paramidx1, attr1...] + }; + + /// TYPE blocks have codes for each type primitive they use. + enum TypeCodes { + TYPE_CODE_NUMENTRY = 1, // NUMENTRY: [numentries] + + // Type Codes + TYPE_CODE_VOID = 2, // VOID + TYPE_CODE_FLOAT = 3, // FLOAT + TYPE_CODE_DOUBLE = 4, // DOUBLE + TYPE_CODE_LABEL = 5, // LABEL + TYPE_CODE_OPAQUE = 6, // OPAQUE + TYPE_CODE_INTEGER = 7, // INTEGER: [width] + TYPE_CODE_POINTER = 8, // POINTER: [pointee type] + TYPE_CODE_FUNCTION = 9, // FUNCTION: [vararg, retty, paramty x N] + TYPE_CODE_STRUCT = 10, // STRUCT: [ispacked, eltty x N] + TYPE_CODE_ARRAY = 11, // ARRAY: [numelts, eltty] + TYPE_CODE_VECTOR = 12 // VECTOR: [numelts, eltty] + // Any other type code is assumed to be an unknown type. + }; + + // The type symbol table only has one code (TST_ENTRY_CODE). + enum TypeSymtabCodes { + TST_CODE_ENTRY = 1 // TST_ENTRY: [typeid, namechar x N] + }; + + // The value symbol table only has one code (VST_ENTRY_CODE). + enum ValueSymtabCodes { + VST_CODE_ENTRY = 1, // VST_ENTRY: [valid, namechar x N] + VST_CODE_BBENTRY = 2 // VST_BBENTRY: [bbid, namechar x N] + }; + + // The constants block (CONSTANTS_BLOCK_ID) describes emission for each + // constant and maintains an implicit current type value. + enum ConstantsCodes { + CST_CODE_SETTYPE = 1, // SETTYPE: [typeid] + CST_CODE_NULL = 2, // NULL + CST_CODE_UNDEF = 3, // UNDEF + CST_CODE_INTEGER = 4, // INTEGER: [intval] + CST_CODE_WIDE_INTEGER = 5, // WIDE_INTEGER: [n x intval] + CST_CODE_FLOAT = 6, // FLOAT: [fpval] + CST_CODE_AGGREGATE = 7, // AGGREGATE: [n x value number] + CST_CODE_STRING = 8, // STRING: [values] + CST_CODE_CSTRING = 9, // CSTRING: [values] + CST_CODE_CE_BINOP = 10, // CE_BINOP: [opcode, opval, opval] + CST_CODE_CE_CAST = 11, // CE_CAST: [opcode, opty, opval] + CST_CODE_CE_GEP = 12, // CE_GEP: [n x operands] + CST_CODE_CE_SELECT = 13, // CE_SELECT: [opval, opval, opval] + CST_CODE_CE_EXTRACTELT = 14, // CE_EXTRACTELT: [opty, opval, opval] + CST_CODE_CE_INSERTELT = 15, // CE_INSERTELT: [opval, opval, opval] + CST_CODE_CE_SHUFFLEVEC = 16, // CE_SHUFFLEVEC: [opval, opval, opval] + CST_CODE_CE_CMP = 17, // CE_CMP: [opty, opval, opval, pred] + CST_CODE_INLINEASM = 18 // INLINEASM: [sideeffect,asmstr,conststr] + }; + + /// CastOpcodes - These are values used in the bitcode files to encode which + /// cast a CST_CODE_CE_CAST or a XXX refers to. The values of these enums + /// have no fixed relation to the LLVM IR enum values. Changing these will + /// break compatibility with old files. + enum CastOpcodes { + CAST_TRUNC = 0, + CAST_ZEXT = 1, + CAST_SEXT = 2, + CAST_FPTOUI = 3, + CAST_FPTOSI = 4, + CAST_UITOFP = 5, + CAST_SITOFP = 6, + CAST_FPTRUNC = 7, + CAST_FPEXT = 8, + CAST_PTRTOINT = 9, + CAST_INTTOPTR = 10, + CAST_BITCAST = 11 + }; + + /// BinaryOpcodes - These are values used in the bitcode files to encode which + /// binop a CST_CODE_CE_BINOP or a XXX refers to. The values of these enums + /// have no fixed relation to the LLVM IR enum values. Changing these will + /// break compatibility with old files. + enum BinaryOpcodes { + BINOP_ADD = 0, + BINOP_SUB = 1, + BINOP_MUL = 2, + BINOP_UDIV = 3, + BINOP_SDIV = 4, // overloaded for FP + BINOP_UREM = 5, + BINOP_SREM = 6, // overloaded for FP + BINOP_SHL = 7, + BINOP_LSHR = 8, + BINOP_ASHR = 9, + BINOP_AND = 10, + BINOP_OR = 11, + BINOP_XOR = 12 + }; + + + // The function body block (FUNCTION_BLOCK_ID) describes function bodies. It + // can contain a constant block (CONSTANTS_BLOCK_ID). + enum FunctionCodes { + FUNC_CODE_DECLAREBLOCKS = 1, // DECLAREBLOCKS: [n] + + FUNC_CODE_INST_BINOP = 2, // BINOP: [opcode, ty, opval, opval] + FUNC_CODE_INST_CAST = 3, // CAST: [opcode, ty, opty, opval] + FUNC_CODE_INST_GEP = 4, // GEP: [n x operands] + FUNC_CODE_INST_SELECT = 5, // SELECT: [ty, opval, opval, opval] + FUNC_CODE_INST_EXTRACTELT = 6, // EXTRACTELT: [opty, opval, opval] + FUNC_CODE_INST_INSERTELT = 7, // INSERTELT: [ty, opval, opval, opval] + FUNC_CODE_INST_SHUFFLEVEC = 8, // SHUFFLEVEC: [ty, opval, opval, opval] + FUNC_CODE_INST_CMP = 9, // CMP: [opty, opval, opval, pred] + + FUNC_CODE_INST_RET = 10, // RET: [opty,opval<both optional>] + FUNC_CODE_INST_BR = 11, // BR: [bb#, bb#, cond] or [bb#] + FUNC_CODE_INST_SWITCH = 12, // SWITCH: [opty, opval, n, n x ops] + FUNC_CODE_INST_INVOKE = 13, // INVOKE: [attr, fnty, op0,op1, ...] + FUNC_CODE_INST_UNWIND = 14, // UNWIND + FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE + + FUNC_CODE_INST_PHI = 16, // PHI: [ty, val0,bb0, ...] + FUNC_CODE_INST_MALLOC = 17, // MALLOC: [instty, op, align] + FUNC_CODE_INST_FREE = 18, // FREE: [opty, op] + FUNC_CODE_INST_ALLOCA = 19, // ALLOCA: [instty, op, align] + FUNC_CODE_INST_LOAD = 20, // LOAD: [opty, op, align, vol] + FUNC_CODE_INST_STORE = 21, // STORE: [ptrty,val,ptr, align, vol] + FUNC_CODE_INST_CALL = 22, // CALL: [attr, fnty, fnid, args...] + FUNC_CODE_INST_VAARG = 23 // VAARG: [valistty, valist, instty] + }; +} // End bitc namespace +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h new file mode 100644 index 0000000..7898385 --- /dev/null +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -0,0 +1,48 @@ +//===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines interfaces to read and write LLVM bitcode files/streams. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_H +#define LLVM_BITCODE_H + +#include <iosfwd> +#include <string> + +namespace llvm { + class Module; + class ModuleProvider; + class MemoryBuffer; + class ModulePass; + + /// getBitcodeModuleProvider - Read the header of the specified bitcode buffer + /// and prepare for lazy deserialization of function bodies. If successful, + /// this takes ownership of 'buffer' and returns a non-null pointer. On + /// error, this returns null, *does not* take ownership of Buffer, and fills + /// in *ErrMsg with an error description if ErrMsg is non-null. + ModuleProvider *getBitcodeModuleProvider(MemoryBuffer *Buffer, + std::string *ErrMsg = 0); + + /// ParseBitcodeFile - Read the specified bitcode file, returning the module. + /// If an error occurs, this returns null and fills in *ErrMsg if it is + /// non-null. This method *never* takes ownership of Buffer. + Module *ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg = 0); + + /// WriteBitcodeToFile - Write the specified module to the specified output + /// stream. + void WriteBitcodeToFile(const Module *M, std::ostream &Out); + + /// CreateBitcodeWriterPass - Create and return a pass that writes the module + /// to the specified ostream. + ModulePass *CreateBitcodeWriterPass(std::ostream &Str); +} // End llvm namespace + +#endif |