1 files changed, 147 insertions, 185 deletions
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 6f478b7..ecf8235 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -17,39 +17,37 @@
 
 #include "llvm/Bitcode/BitCodes.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/StreamableMemoryObject.h"
+#include "llvm/Support/StreamingMemoryObject.h"
 #include <climits>
 #include <string>
 #include <vector>
 
 namespace llvm {
 
-  class Deserializer;
+class Deserializer;
 
-/// BitstreamReader - This class is used to read from an LLVM bitcode stream,
-/// maintaining information that is global to decoding the entire file.  While
-/// a file is being read, multiple cursors can be independently advanced or
-/// skipped around within the file.  These are represented by the
-/// BitstreamCursor class.
+/// This class is used to read from an LLVM bitcode stream, maintaining
+/// information that is global to decoding the entire file. While a file is
+/// being read, multiple cursors can be independently advanced or skipped around
+/// within the file.  These are represented by the BitstreamCursor class.
 class BitstreamReader {
 public:
-  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
-  /// These describe abbreviations that all blocks of the specified ID inherit.
+  /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
+  /// describe abbreviations that all blocks of the specified ID inherit.
   struct BlockInfo {
     unsigned BlockID;
-    std::vector<BitCodeAbbrev*> Abbrevs;
+    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
     std::string Name;
 
     std::vector<std::pair<unsigned, std::string> > RecordNames;
   };
 private:
-  std::unique_ptr<StreamableMemoryObject> BitcodeBytes;
+  std::unique_ptr<MemoryObject> BitcodeBytes;
 
   std::vector<BlockInfo> BlockInfoRecords;
 
-  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
-  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
-  /// uses this.
+  /// This is set to true if we don't care about the block/record name
+  /// information in the BlockInfo block. Only llvm-bcanalyzer uses this.
   bool IgnoreBlockInfoNames;
 
   BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION;
@@ -58,36 +56,35 @@ public:
   BitstreamReader() : IgnoreBlockInfoNames(true) {
   }
 
-  BitstreamReader(const unsigned char *Start, const unsigned char *End) {
-    IgnoreBlockInfoNames = true;
+  BitstreamReader(const unsigned char *Start, const unsigned char *End)
+      : IgnoreBlockInfoNames(true) {
     init(Start, End);
   }
 
-  BitstreamReader(StreamableMemoryObject *bytes) {
+  BitstreamReader(MemoryObject *bytes) : IgnoreBlockInfoNames(true) {
     BitcodeBytes.reset(bytes);
   }
 
+  BitstreamReader(BitstreamReader &&Other) {
+    *this = std::move(Other);
+  }
+
+  BitstreamReader &operator=(BitstreamReader &&Other) {
+    BitcodeBytes = std::move(Other.BitcodeBytes);
+    // Explicitly swap block info, so that nothing gets destroyed twice.
+    std::swap(BlockInfoRecords, Other.BlockInfoRecords);
+    IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
+    return *this;
+  }
+
   void init(const unsigned char *Start, const unsigned char *End) {
     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
     BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
   }
 
-  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
-
-  ~BitstreamReader() {
-    // Free the BlockInfoRecords.
-    while (!BlockInfoRecords.empty()) {
-      BlockInfo &Info = BlockInfoRecords.back();
-      // Free blockinfo abbrev info.
-      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
-           i != e; ++i)
-        Info.Abbrevs[i]->dropRef();
-      BlockInfoRecords.pop_back();
-    }
-  }
+  MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
 
-  /// CollectBlockInfoNames - This is called by clients that want block/record
-  /// name information.
+  /// This is called by clients that want block/record name information.
   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
 
@@ -95,13 +92,13 @@ public:
   // Block Manipulation
   //===--------------------------------------------------------------------===//
 
-  /// hasBlockInfoRecords - Return true if we've already read and processed the
-  /// block info block for this Bitstream.  We only process it for the first
-  /// cursor that walks over it.
+  /// Return true if we've already read and processed the block info block for
+  /// this Bitstream. We only process it for the first cursor that walks over
+  /// it.
   bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
 
-  /// getBlockInfo - If there is block info for the specified ID, return it,
-  /// otherwise return null.
+  /// If there is block info for the specified ID, return it, otherwise return
+  /// null.
   const BlockInfo *getBlockInfo(unsigned BlockID) const {
     // Common case, the most recent entry matches BlockID.
     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
@@ -123,23 +120,26 @@ public:
     BlockInfoRecords.back().BlockID = BlockID;
     return BlockInfoRecords.back();
   }
-};
 
+  /// Takes block info from the other bitstream reader.
+  ///
+  /// This is a "take" operation because BlockInfo records are non-trivial, and
+  /// indeed rather expensive.
+  void takeBlockInfo(BitstreamReader &&Other) {
+    assert(!hasBlockInfoRecords());
+    BlockInfoRecords = std::move(Other.BlockInfoRecords);
+  }
+};
 
-/// BitstreamEntry - When advancing through a bitstream cursor, each advance can
-/// discover a few different kinds of entries:
-///   Error    - Malformed bitcode was found.
-///   EndBlock - We've reached the end of the current block, (or the end of the
-///              file, which is treated like a series of EndBlock records.
-///   SubBlock - This is the start of a new subblock of a specific ID.
-///   Record   - This is a record with a specific AbbrevID.
-///
+/// When advancing through a bitstream cursor, each advance can discover a few
+/// different kinds of entries:
 struct BitstreamEntry {
   enum {
-    Error,
-    EndBlock,
-    SubBlock,
-    Record
+    Error,    // Malformed bitcode was found.
+    EndBlock, // We've reached the end of the current block, (or the end of the
+              // file, which is treated like a series of EndBlock records.
+    SubBlock, // This is the start of a new subblock of a specific ID.
+    Record    // This is a record with a specific AbbrevID.
   } Kind;
 
   unsigned ID;
@@ -158,9 +158,9 @@ struct BitstreamEntry {
   }
 };
 
-/// BitstreamCursor - This represents a position within a bitcode file.  There
-/// may be multiple independent cursors reading within one bitstream, each
-/// maintaining their own local state.
+/// This represents a position within a bitcode file. There may be multiple
+/// independent cursors reading within one bitstream, each maintaining their own
+/// local state.
 ///
 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
 /// be passed by value.
@@ -169,92 +169,74 @@ class BitstreamCursor {
   BitstreamReader *BitStream;
   size_t NextChar;
 
+  // The size of the bicode. 0 if we don't know it yet.
+  size_t Size;
 
-  /// CurWord/word_t - This is the current data we have pulled from the stream
-  /// but have not returned to the client.  This is specifically and
-  /// intentionally defined to follow the word size of the host machine for
-  /// efficiency.  We use word_t in places that are aware of this to make it
-  /// perfectly explicit what is going on.
-  typedef uint32_t word_t;
+  /// This is the current data we have pulled from the stream but have not
+  /// returned to the client. This is specifically and intentionally defined to
+  /// follow the word size of the host machine for efficiency. We use word_t in
+  /// places that are aware of this to make it perfectly explicit what is going
+  /// on.
+  typedef size_t word_t;
   word_t CurWord;
 
-  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
-  /// is always from [0...31/63] inclusive (depending on word size).
+  /// This is the number of bits in CurWord that are valid. This is always from
+  /// [0...bits_of(size_t)-1] inclusive.
   unsigned BitsInCurWord;
 
-  // CurCodeSize - This is the declared size of code values used for the current
-  // block, in bits.
+  // This is the declared size of code values used for the current block, in
+  // bits.
   unsigned CurCodeSize;
 
-  /// CurAbbrevs - Abbrevs installed at in this block.
-  std::vector<BitCodeAbbrev*> CurAbbrevs;
+  /// Abbrevs installed at in this block.
+  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
 
   struct Block {
     unsigned PrevCodeSize;
-    std::vector<BitCodeAbbrev*> PrevAbbrevs;
+    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
   };
 
-  /// BlockScope - This tracks the codesize of parent blocks.
+  /// This tracks the codesize of parent blocks.
   SmallVector<Block, 8> BlockScope;
 
 
 public:
-  BitstreamCursor() : BitStream(nullptr), NextChar(0) {}
-  BitstreamCursor(const BitstreamCursor &RHS)
-      : BitStream(nullptr), NextChar(0) {
-    operator=(RHS);
-  }
+  BitstreamCursor() { init(nullptr); }
 
-  explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
-    NextChar = 0;
-    CurWord = 0;
-    BitsInCurWord = 0;
-    CurCodeSize = 2;
-  }
+  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
 
-  void init(BitstreamReader &R) {
+  void init(BitstreamReader *R) {
     freeState();
 
-    BitStream = &R;
+    BitStream = R;
     NextChar = 0;
-    CurWord = 0;
+    Size = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
   }
 
-  ~BitstreamCursor() {
-    freeState();
-  }
-
-  void operator=(const BitstreamCursor &RHS);
-
   void freeState();
 
-  bool isEndPos(size_t pos) {
-    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
-  }
-
   bool canSkipToPos(size_t pos) const {
     // pos can be skipped to if it is a valid address or one byte past the end.
     return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
         static_cast<uint64_t>(pos - 1));
   }
 
-  uint32_t getWord(size_t pos) {
-    uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
-    BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf);
-    return *reinterpret_cast<support::ulittle32_t *>(buf);
-  }
-
   bool AtEndOfStream() {
-    return BitsInCurWord == 0 && isEndPos(NextChar);
+    if (BitsInCurWord != 0)
+      return false;
+    if (Size != 0)
+      return Size == NextChar;
+    fillCurWord();
+    return BitsInCurWord == 0;
   }
 
-  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+  /// Return the number of bits used to encode an abbrev #.
   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
 
-  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
+  /// Return the bit # of the bit we are reading.
   uint64_t GetCurrentBitNo() const {
     return NextChar*CHAR_BIT - BitsInCurWord;
   }
@@ -268,19 +250,17 @@ public:
 
   /// Flags that modify the behavior of advance().
   enum {
-    /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
-    /// not automatically pop the block scope when the end of a block is
-    /// reached.
+    /// If this flag is used, the advance() method does not automatically pop
+    /// the block scope when the end of a block is reached.
     AF_DontPopBlockAtEnd = 1,
 
-    /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
-    /// returned just like normal records.
+    /// If this flag is used, abbrev entries are returned just like normal
+    /// records.
     AF_DontAutoprocessAbbrevs = 2
   };
 
-  /// advance - Advance the current bitstream, returning the next entry in the
-  /// stream.
-  BitstreamEntry advance(unsigned Flags = 0) {
+      /// Advance the current bitstream, returning the next entry in the stream.
+      BitstreamEntry advance(unsigned Flags = 0) {
     while (1) {
       unsigned Code = ReadCode();
       if (Code == bitc::END_BLOCK) {
@@ -305,8 +285,8 @@ public:
     }
   }
 
-  /// advanceSkippingSubblocks - This is a convenience function for clients that
-  /// don't expect any subblocks.  This just skips over them automatically.
+  /// This is a convenience function for clients that don't expect any
+  /// subblocks. This just skips over them automatically.
   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
     while (1) {
       // If we found a normal entry, return it.
@@ -320,7 +300,7 @@ public:
     }
   }
 
-  /// JumpToBit - Reset the stream to the specified bit number.
+  /// Reset the stream to the specified bit number.
   void JumpToBit(uint64_t BitNo) {
     uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
@@ -329,77 +309,74 @@ public:
     // Move the cursor to the right word.
     NextChar = ByteNo;
     BitsInCurWord = 0;
-    CurWord = 0;
 
     // Skip over any bits that are already consumed.
-    if (WordBitNo) {
-      if (sizeof(word_t) > 4)
-        Read64(WordBitNo);
-      else
-        Read(WordBitNo);
+    if (WordBitNo)
+      Read(WordBitNo);
+  }
+
+  void fillCurWord() {
+    assert(Size == 0 || NextChar < (unsigned)Size);
+
+    // Read the next word from the stream.
+    uint8_t Array[sizeof(word_t)] = {0};
+
+    uint64_t BytesRead =
+        BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
+
+    // If we run out of data, stop at the end of the stream.
+    if (BytesRead == 0) {
+      Size = NextChar;
+      return;
     }
+
+    CurWord =
+        support::endian::read<word_t, support::little, support::unaligned>(
+            Array);
+    NextChar += BytesRead;
+    BitsInCurWord = BytesRead * 8;
   }
 
+  word_t Read(unsigned NumBits) {
+    static const unsigned BitsInWord = sizeof(word_t) * 8;
 
-  uint32_t Read(unsigned NumBits) {
-    assert(NumBits && NumBits <= 32 &&
-           "Cannot return zero or more than 32 bits!");
+    assert(NumBits && NumBits <= BitsInWord &&
+           "Cannot return zero or more than BitsInWord bits!");
+
+    static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
 
     // If the field is fully contained by CurWord, return it quickly.
     if (BitsInCurWord >= NumBits) {
-      uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
-      CurWord >>= NumBits;
+      word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
+
+      // Use a mask to avoid undefined behavior.
+      CurWord >>= (NumBits & Mask);
+
       BitsInCurWord -= NumBits;
       return R;
     }
 
-    // If we run out of data, stop at the end of the stream.
-    if (isEndPos(NextChar)) {
-      CurWord = 0;
-      BitsInCurWord = 0;
-      return 0;
-    }
-
-    uint32_t R = uint32_t(CurWord);
-
-    // Read the next word from the stream.
-    uint8_t Array[sizeof(word_t)] = {0};
+    word_t R = BitsInCurWord ? CurWord : 0;
+    unsigned BitsLeft = NumBits - BitsInCurWord;
 
-    BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array);
+    fillCurWord();
 
-    // Handle big-endian byte-swapping if necessary.
-    support::detail::packed_endian_specific_integral
-      <word_t, support::little, support::unaligned> EndianValue;
-    memcpy(&EndianValue, Array, sizeof(Array));
+    // If we run out of data, stop at the end of the stream.
+    if (BitsLeft > BitsInCurWord)
+      return 0;
 
-    CurWord = EndianValue;
+    word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
 
-    NextChar += sizeof(word_t);
+    // Use a mask to avoid undefined behavior.
+    CurWord >>= (BitsLeft & Mask);
 
-    // Extract NumBits-BitsInCurWord from what we just read.
-    unsigned BitsLeft = NumBits-BitsInCurWord;
+    BitsInCurWord -= BitsLeft;
 
-    // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
-    R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
-                    << BitsInCurWord);
+    R |= R2 << (NumBits - BitsLeft);
 
-    // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
-    // range [1..32]/[1..64] so be careful how we shift.
-    if (BitsLeft != sizeof(word_t)*8)
-      CurWord >>= BitsLeft;
-    else
-      CurWord = 0;
-    BitsInCurWord = sizeof(word_t)*8-BitsLeft;
     return R;
   }
 
-  uint64_t Read64(unsigned NumBits) {
-    if (NumBits <= 32) return Read(NumBits);
-
-    uint64_t V = Read(32);
-    return V | (uint64_t)Read(NumBits-32) << 32;
-  }
-
   uint32_t ReadVBR(unsigned NumBits) {
     uint32_t Piece = Read(NumBits);
     if ((Piece & (1U << (NumBits-1))) == 0)
@@ -418,8 +395,8 @@ public:
     }
   }
 
-  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
-  // chunk size of the VBR must still be <= 32 bits though.
+  // Read a VBR that may have a value up to 64-bits in size. The chunk size of
+  // the VBR must still be <= 32 bits though.
   uint64_t ReadVBR64(unsigned NumBits) {
     uint32_t Piece = Read(NumBits);
     if ((Piece & (1U << (NumBits-1))) == 0)
@@ -450,7 +427,6 @@ private:
     }
 
     BitsInCurWord = 0;
-    CurWord = 0;
   }
 public:
 
@@ -462,15 +438,13 @@ public:
   // Block header:
   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 
-  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
-  /// the block.
+  /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
   unsigned ReadSubBlockID() {
     return ReadVBR(bitc::BlockIDWidth);
   }
 
-  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
-  /// over the body of this block.  If the block record is malformed, return
-  /// true.
+  /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
+  /// of this block. If the block record is malformed, return true.
   bool SkipBlock() {
     // Read and ignore the codelen value.  Since we are skipping this block, we
     // don't care what code widths are used inside of it.
@@ -488,8 +462,8 @@ public:
     return false;
   }
 
-  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
-  /// the block, and return true if the block has an error.
+  /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
+  /// if the block has an error.
   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
 
   bool ReadBlockEnd() {
@@ -508,12 +482,7 @@ private:
   void popBlockScope() {
     CurCodeSize = BlockScope.back().PrevCodeSize;
 
-    // Delete abbrevs from popped scope.
-    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
-         i != e; ++i)
-      CurAbbrevs[i]->dropRef();
-
-    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
     BlockScope.pop_back();
   }
 
@@ -521,23 +490,16 @@ private:
   // Record Processing
   //===--------------------------------------------------------------------===//
 
-private:
-  void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
-                              SmallVectorImpl<uint64_t> &Vals);
-  void readAbbreviatedField(const BitCodeAbbrevOp &Op,
-                            SmallVectorImpl<uint64_t> &Vals);
-  void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
-
 public:
 
-  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
+  /// Return the abbreviation for the specified AbbrevId.
   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
     unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
-    return CurAbbrevs[AbbrevNo];
+    return CurAbbrevs[AbbrevNo].get();
   }
 
-  /// skipRecord - Read the current record and discard it.
+  /// Read the current record and discard it.
   void skipRecord(unsigned AbbrevID);
 
   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,