1 files changed, 173 insertions, 43 deletions
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index e5f6161..50a03a1 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -110,14 +110,27 @@
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
 
 #include <string>
-#ifndef _MSC_VER
-#include <sys/param.h>
-#endif  // !_MSC_VER
+#ifdef _MSC_VER
+  #if defined(_M_IX86) && \
+      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+    #define PROTOBUF_LITTLE_ENDIAN 1
+  #endif
+  #if _MSC_VER >= 1300
+    // If MSVC has "/RTCc" set, it will complain about truncating casts at
+    // runtime.  This file contains some intentional truncating casts.
+    #pragma runtime_checks("c", off)
+  #endif
+#else
+  #include <sys/param.h>   // __BYTE_ORDER
+  #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && \
+      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+    #define PROTOBUF_LITTLE_ENDIAN 1
+  #endif
+#endif
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/common.h>          // for GOOGLE_PREDICT_TRUE macro
 
-namespace google {
 
+namespace google {
 namespace protobuf {
 
 class DescriptorPool;
@@ -157,6 +170,9 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // successfully and the stream's byte limit.
   ~CodedInputStream();
 
+  // Return true if this CodedInputStream reads from a flat array instead of
+  // a ZeroCopyInputStream.
+  inline bool IsFlat() const;
 
   // Skips a number of bytes.  Returns false if an underlying read error
   // occurs.
@@ -217,11 +233,22 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
   // the last tag value, which can be checked with LastTagWas().
-  // Always inline because this is only called in once place per parse loop
+  // Always inline because this is only called in one place per parse loop
   // but it is called for every iteration of said loop, so it should be fast.
   // GCC doesn't want to inline this by default.
   uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
 
+  // This usually a faster alternative to ReadTag() when cutoff is a manifest
+  // constant.  It does particularly well for cutoff >= 127.  The first part
+  // of the return value is the tag that was read, though it can also be 0 in
+  // the cases where ReadTag() would return 0.  If the second part is true
+  // then the tag is known to be in [0, cutoff].  If not, the tag either is
+  // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
+  // because that can arise in several ways, and for best performance we want
+  // to avoid an extra "is tag == 0?" check here.)
+  inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
+      GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
   // Usually returns true if calling ReadVarint32() now would produce the given
   // value.  Will always return false if ReadVarint32() would not return the
   // given value.  If ExpectTag() returns true, it also advances past
@@ -248,8 +275,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // zero, and ConsumedEntireMessage() will return true.
   bool ExpectAtEnd();
 
-  // If the last call to ReadTag() returned the given value, returns true.
-  // Otherwise, returns false;
+  // If the last call to ReadTag() or ReadTagWithCutoff() returned the
+  // given value, returns true.  Otherwise, returns false;
   //
   // This is needed because parsers for some types of embedded messages
   // (with field type TYPE_GROUP) don't actually know that they've reached the
@@ -298,7 +325,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
 
   // Returns the number of bytes left until the nearest limit on the
   // stack is hit, or -1 if no limits are in place.
-  int BytesUntilLimit();
+  int BytesUntilLimit() const;
+
+  // Returns current position relative to the beginning of the input stream.
+  int CurrentPosition() const;
 
   // Total Bytes Limit -----------------------------------------------
   // To prevent malicious users from sending excessively large messages
@@ -314,8 +344,9 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
   // should set shorter limits if possible.  If warning_threshold is not -1,
   // a warning will be printed to stderr after warning_threshold bytes are
-  // read.  An error will always be printed to stderr if the limit is
-  // reached.
+  // read.  For backwards compatibility all negative values get squashed to -1,
+  // as other negative values might have special internal meanings.
+  // An error will always be printed to stderr if the limit is reached.
   //
   // This is unrelated to PushLimit()/PopLimit().
   //
@@ -336,15 +367,20 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   //   something unusual.
   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
 
+  // The Total Bytes Limit minus the Current Position, or -1 if there
+  // is no Total Bytes Limit.
+  int BytesUntilTotalBytesLimit() const;
+
   // Recursion Limit -------------------------------------------------
   // To prevent corrupt or malicious messages from causing stack overflows,
   // we must keep track of the depth of recursion when parsing embedded
   // messages and groups.  CodedInputStream keeps track of this because it
   // is the only object that is passed down the stack during parsing.
 
-  // Sets the maximum recursion depth.  The default is 64.
+  // Sets the maximum recursion depth.  The default is 100.
   void SetRecursionLimit(int limit);
 
+
   // Increments the current recursion depth.  Returns true if the depth is
   // under the limit, false if it has gone over.
   bool IncrementRecursionDepth();
@@ -420,7 +456,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   //
   // Note that this feature is ignored when parsing "lite" messages as they do
   // not have descriptors.
-  void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
+  void SetExtensionRegistry(const DescriptorPool* pool,
+                            MessageFactory* factory);
 
   // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
   // has been provided.
@@ -444,7 +481,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   int overflow_bytes_;
 
   // LastTagWas() stuff.
-  uint32 last_tag_;         // result of last ReadTag().
+  uint32 last_tag_;         // result of last ReadTag() or ReadTagWithCutoff().
 
   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
@@ -469,6 +506,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Maximum number of bytes to read, period.  This is unrelated to
   // current_limit_.  Set using SetTotalBytesLimit().
   int total_bytes_limit_;
+
+  // If positive/0: Limit for bytes read after which a warning due to size
+  // should be logged.
+  // If -1: Printing of warning disabled. Can be set by client.
+  // If -2: Internal: Limit has been reached, print full size when destructing.
   int total_bytes_warning_threshold_;
 
   // Current recursion depth, controlled by IncrementRecursionDepth() and
@@ -521,12 +563,13 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   bool ReadStringFallback(string* buffer, int size);
 
   // Return the size of the buffer.
-  uint32 BufferSize() const;
+  int BufferSize() const;
 
   static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
 
   static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
-  static const int kDefaultRecursionLimit = 64;
+
+  static int default_recursion_limit_;  // 100 by default.
 };
 
 // Class which encodes and writes binary data which is composed of varint-
@@ -554,7 +597,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
 //   char text[] = "Hello world!";
 //
 //   int coded_size = sizeof(magic_number) +
-//                    CodedOutputStream::Varint32Size(strlen(text)) +
+//                    CodedOutputStream::VarintSize32(strlen(text)) +
 //                    strlen(text);
 //
 //   uint8* buffer =
@@ -610,6 +653,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
 
   // Write raw bytes, copying them from the given buffer.
   void WriteRaw(const void* buffer, int size);
+  // Like WriteRaw()  but will try to write aliased data if aliasing is
+  // turned on.
+  void WriteRawMaybeAliased(const void* data, int size);
   // Like WriteRaw()  but writing directly to the target array.
   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
   // copy loops. Since this gets called by every field with string or bytes
@@ -621,8 +667,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   void WriteString(const string& str);
   // Like WriteString()  but writing directly to the target array.
   static uint8* WriteStringToArray(const string& str, uint8* target);
+  // Write the varint-encoded size of str followed by str.
+  static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
 
 
+  // Instructs the CodedOutputStream to allow the underlying
+  // ZeroCopyOutputStream to hold pointers to the original structure instead of
+  // copying, if it supports it (i.e. output->AllowsAliasing() is true).  If the
+  // underlying stream does not support aliasing, then enabling it has no
+  // affect.  For now, this only affects the behavior of
+  // WriteRawMaybeAliased().
+  //
+  // NOTE: It is caller's responsibility to ensure that the chunk of memory
+  // remains live until all of the data has been consumed from the stream.
+  void EnableAliasing(bool enabled);
+
   // Write a 32-bit little-endian integer.
   void WriteLittleEndian32(uint32 value);
   // Like WriteLittleEndian32()  but writing directly to the target array.
@@ -667,6 +726,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
   static int VarintSize32SignExtended(int32 value);
 
+  // Compile-time equivalent of VarintSize32().
+  template <uint32 Value>
+  struct StaticVarintSize32 {
+    static const int value =
+        (Value < (1 << 7))
+            ? 1
+            : (Value < (1 << 14))
+                ? 2
+                : (Value < (1 << 21))
+                    ? 3
+                    : (Value < (1 << 28))
+                        ? 4
+                        : 5;
+  };
+
   // Returns the total number of bytes written since this object was created.
   inline int ByteCount() const;
 
@@ -682,6 +756,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   int buffer_size_;
   int total_bytes_;  // Sum of sizes of all buffers seen so far.
   bool had_error_;   // Whether an error occurred during output.
+  bool aliasing_enabled_;  // See EnableAliasing().
 
   // Advance the buffer by a given number of bytes.
   void Advance(int amount);
@@ -690,6 +765,10 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // Advance(buffer_size_).
   bool Refresh();
 
+  // Like WriteRaw() but may avoid copying if the underlying
+  // ZeroCopyOutputStream supports it.
+  void WriteAliasedRaw(const void* buffer, int size);
+
   static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
 
   // Always-inlined versions of WriteVarint* functions so that code can be
@@ -735,8 +814,7 @@ inline bool CodedInputStream::ReadVarint64(uint64* value) {
 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
     const uint8* buffer,
     uint32* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(value, buffer, sizeof(*value));
   return buffer + sizeof(*value);
 #else
@@ -751,8 +829,7 @@ inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
     const uint8* buffer,
     uint64* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(value, buffer, sizeof(*value));
   return buffer + sizeof(*value);
 #else
@@ -771,9 +848,8 @@ inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
 }
 
 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
-  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+  if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
     memcpy(value, buffer_, sizeof(*value));
     Advance(sizeof(*value));
     return true;
@@ -786,9 +862,8 @@ inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
 }
 
 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
-  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+  if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
     memcpy(value, buffer_, sizeof(*value));
     Advance(sizeof(*value));
     return true;
@@ -811,6 +886,45 @@ inline uint32 CodedInputStream::ReadTag() {
   }
 }
 
+inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
+    uint32 cutoff) {
+  // In performance-sensitive code we can expect cutoff to be a compile-time
+  // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
+  // compile time.
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+    // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
+    // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
+    // is large enough then is it better to check for the two-byte case first?
+    if (static_cast<int8>(buffer_[0]) > 0) {
+      const uint32 kMax1ByteVarint = 0x7f;
+      uint32 tag = last_tag_ = buffer_[0];
+      Advance(1);
+      return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
+    }
+    // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
+    // and tag is two bytes.  The latter is tested by bitwise-and-not of the
+    // first byte and the second byte.
+    if (cutoff >= 0x80 &&
+        GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
+        GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
+      const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
+      uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
+      Advance(2);
+      // It might make sense to test for tag == 0 now, but it is so rare that
+      // that we don't bother.  A varint-encoded 0 should be one byte unless
+      // the encoder lost its mind.  The second part of the return value of
+      // this function is allowed to be either true or false if the tag is 0,
+      // so we don't have to check for tag == 0.  We may need to check whether
+      // it exceeds cutoff.
+      bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
+      return make_pair(tag, at_or_below_cutoff);
+    }
+  }
+  // Slow path
+  last_tag_ = ReadTagFallback();
+  return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
+}
+
 inline bool CodedInputStream::LastTagWas(uint32 expected) {
   return last_tag_ == expected;
 }
@@ -867,7 +981,9 @@ inline bool CodedInputStream::ExpectAtEnd() {
   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
   // hard to say without calling Refresh(), and we'd rather not do that.
 
-  if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
+  if (buffer_ == buffer_end_ &&
+      ((buffer_size_after_limit_ != 0) ||
+       (total_bytes_read_ == current_limit_))) {
     last_tag_ = 0;                   // Pretend we called ReadTag()...
     legitimate_message_end_ = true;  // ... and it hit EOF.
     return true;
@@ -876,6 +992,10 @@ inline bool CodedInputStream::ExpectAtEnd() {
   }
 }
 
+inline int CodedInputStream::CurrentPosition() const {
+  return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
+}
+
 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
   if (buffer_size_ < size) {
     return NULL;
@@ -915,8 +1035,7 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
 
 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
                                                             uint8* target) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(target, &value, sizeof(value));
 #else
   target[0] = static_cast<uint8>(value);
@@ -929,8 +1048,7 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
 
 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
                                                             uint8* target) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(target, &value, sizeof(value));
 #else
   uint32 part0 = static_cast<uint32>(value);
@@ -983,12 +1101,21 @@ inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
 }
 
 inline void CodedOutputStream::WriteString(const string& str) {
-  WriteRaw(str.data(), str.size());
+  WriteRaw(str.data(), static_cast<int>(str.size()));
+}
+
+inline void CodedOutputStream::WriteRawMaybeAliased(
+    const void* data, int size) {
+  if (aliasing_enabled_) {
+    WriteAliasedRaw(data, size);
+  } else {
+    WriteRaw(data, size);
+  }
 }
 
 inline uint8* CodedOutputStream::WriteStringToArray(
     const string& str, uint8* target) {
-  return WriteRawToArray(str.data(), str.size(), target);
+  return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
 }
 
 inline int CodedOutputStream::ByteCount() const {
@@ -1017,7 +1144,7 @@ inline void CodedInputStream::DecrementRecursionDepth() {
   if (recursion_depth_ > 0) --recursion_depth_;
 }
 
-inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
+inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
                                                    MessageFactory* factory) {
   extension_pool_ = pool;
   extension_factory_ = factory;
@@ -1031,7 +1158,7 @@ inline MessageFactory* CodedInputStream::GetExtensionFactory() {
   return extension_factory_;
 }
 
-inline uint32 CodedInputStream::BufferSize() const {
+inline int CodedInputStream::BufferSize() const {
   return buffer_end_ - buffer_;
 }
 
@@ -1044,12 +1171,12 @@ inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
     last_tag_(0),
     legitimate_message_end_(false),
     aliasing_enabled_(false),
-    current_limit_(INT_MAX),
+    current_limit_(kint32max),
     buffer_size_after_limit_(0),
     total_bytes_limit_(kDefaultTotalBytesLimit),
     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
     recursion_depth_(0),
-    recursion_limit_(kDefaultRecursionLimit),
+    recursion_limit_(default_recursion_limit_),
     extension_pool_(NULL),
     extension_factory_(NULL) {
   // Eagerly Refresh() so buffer space is immediately available.
@@ -1070,21 +1197,24 @@ inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
     total_bytes_limit_(kDefaultTotalBytesLimit),
     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
     recursion_depth_(0),
-    recursion_limit_(kDefaultRecursionLimit),
+    recursion_limit_(default_recursion_limit_),
     extension_pool_(NULL),
     extension_factory_(NULL) {
   // Note that setting current_limit_ == size is important to prevent some
   // code paths from trying to access input_ and segfaulting.
 }
 
-inline CodedInputStream::~CodedInputStream() {
-  if (input_ != NULL) {
-    BackUpInputToCurrentPosition();
-  }
+inline bool CodedInputStream::IsFlat() const {
+  return input_ == NULL;
 }
 
 }  // namespace io
 }  // namespace protobuf
 
+
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+  #pragma runtime_checks("c", restore)
+#endif  // _MSC_VER
+
 }  // namespace google
 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__