21 files changed, 2046 insertions, 259 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index 6a91a13..f6a8453 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -43,7 +43,7 @@
 #include <limits.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util-inl.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 
 namespace google {
@@ -56,10 +56,36 @@ static const int kMaxVarintBytes = 10;
 static const int kMaxVarint32Bytes = 5;
 
 
+inline bool NextNonEmpty(ZeroCopyInputStream* input,
+                         const void** data, int* size) {
+  bool success;
+  do {
+    success = input->Next(data, size);
+  } while (success && *size == 0);
+  return success;
+}
+
 }  // namespace
 
 // CodedInputStream ==================================================
 
+CodedInputStream::~CodedInputStream() {
+  if (input_ != NULL) {
+    BackUpInputToCurrentPosition();
+  }
+
+  if (total_bytes_warning_threshold_ == -2) {
+    GOOGLE_LOG(WARNING) << "The total number of bytes read was " << total_bytes_read_;
+  }
+}
+
+// Static.
+int CodedInputStream::default_recursion_limit_ = 100;
+
+
+void CodedOutputStream::EnableAliasing(bool enabled) {
+  aliasing_enabled_ = enabled && output_->AllowsAliasing();
+}
 
 void CodedInputStream::BackUpInputToCurrentPosition() {
   int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
@@ -89,8 +115,7 @@ inline void CodedInputStream::RecomputeBufferLimits() {
 
 CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
   // Current position relative to the beginning of the stream.
-  int current_position = total_bytes_read_ -
-      (BufferSize() + buffer_size_after_limit_);
+  int current_position = CurrentPosition();
 
   Limit old_limit = current_limit_;
 
@@ -124,10 +149,9 @@ void CodedInputStream::PopLimit(Limit limit) {
   legitimate_message_end_ = false;
 }
 
-int CodedInputStream::BytesUntilLimit() {
+int CodedInputStream::BytesUntilLimit() const {
   if (current_limit_ == INT_MAX) return -1;
-  int current_position = total_bytes_read_ -
-      (BufferSize() + buffer_size_after_limit_);
+  int current_position = CurrentPosition();
 
   return current_limit_ - current_position;
 }
@@ -136,13 +160,22 @@ void CodedInputStream::SetTotalBytesLimit(
     int total_bytes_limit, int warning_threshold) {
   // Make sure the limit isn't already past, since this could confuse other
   // code.
-  int current_position = total_bytes_read_ -
-      (BufferSize() + buffer_size_after_limit_);
+  int current_position = CurrentPosition();
   total_bytes_limit_ = max(current_position, total_bytes_limit);
-  total_bytes_warning_threshold_ = warning_threshold;
+  if (warning_threshold >= 0) {
+    total_bytes_warning_threshold_ = warning_threshold;
+  } else {
+    // warning_threshold is negative
+    total_bytes_warning_threshold_ = -1;
+  }
   RecomputeBufferLimits();
 }
 
+int CodedInputStream::BytesUntilTotalBytesLimit() const {
+  if (total_bytes_limit_ == INT_MAX) return -1;
+  return total_bytes_limit_ - CurrentPosition();
+}
+
 void CodedInputStream::PrintTotalBytesLimitError() {
   GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
                 "big (more than " << total_bytes_limit_
@@ -223,6 +256,14 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
     buffer->clear();
   }
 
+  int closest_limit = min(current_limit_, total_bytes_limit_);
+  if (closest_limit != INT_MAX) {
+    int bytes_to_limit = closest_limit - CurrentPosition();
+    if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) {
+      buffer->reserve(size);
+    }
+  }
+
   int current_buffer_size;
   while ((current_buffer_size = BufferSize()) < size) {
     // Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
@@ -289,11 +330,16 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
   uint32 b;
   uint32 result;
 
-  b = *(ptr++); result  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-  b = *(ptr++); result |=  b         << 28; if (!(b & 0x80)) goto done;
+  b = *(ptr++); result  = b      ; if (!(b & 0x80)) goto done;
+  result -= 0x80;
+  b = *(ptr++); result += b <<  7; if (!(b & 0x80)) goto done;
+  result -= 0x80 << 7;
+  b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done;
+  result -= 0x80 << 14;
+  b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done;
+  result -= 0x80 << 21;
+  b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done;
+  // "result -= 0x80 << 28" is irrevelant.
 
   // If the input is larger than 32 bits, we still need to read it all
   // and discard the high-order bits.
@@ -323,8 +369,8 @@ bool CodedInputStream::ReadVarint32Slow(uint32* value) {
 
 bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
   if (BufferSize() >= kMaxVarintBytes ||
-      // Optimization:  If the varint ends at exactly the end of the buffer,
-      // we can detect that and still use the fast path.
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
       (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
     const uint8* end = ReadVarint32FromArray(buffer_, value);
     if (end == NULL) return false;
@@ -359,16 +405,17 @@ uint32 CodedInputStream::ReadTagSlow() {
 
   // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags
   // again, since we have now refreshed the buffer.
-  uint64 result;
+  uint64 result = 0;
   if (!ReadVarint64(&result)) return 0;
   return static_cast<uint32>(result);
 }
 
 uint32 CodedInputStream::ReadTagFallback() {
-  if (BufferSize() >= kMaxVarintBytes ||
-      // Optimization:  If the varint ends at exactly the end of the buffer,
-      // we can detect that and still use the fast path.
-      (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+  const int buf_size = BufferSize();
+  if (buf_size >= kMaxVarintBytes ||
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
+      (buf_size > 0 && !(buffer_end_[-1] & 0x80))) {
     uint32 tag;
     const uint8* end = ReadVarint32FromArray(buffer_, &tag);
     if (end == NULL) {
@@ -379,7 +426,9 @@ uint32 CodedInputStream::ReadTagFallback() {
   } else {
     // We are commonly at a limit when attempting to read tags. Try to quickly
     // detect this case without making another function call.
-    if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 &&
+    if ((buf_size == 0) &&
+        ((buffer_size_after_limit_ > 0) ||
+         (total_bytes_read_ == current_limit_)) &&
         // Make sure that the limit we hit is not total_bytes_limit_, since
         // in that case we still need to call Refresh() so that it prints an
         // error.
@@ -417,8 +466,8 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) {
 
 bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
   if (BufferSize() >= kMaxVarintBytes ||
-      // Optimization:  If the varint ends at exactly the end of the buffer,
-      // we can detect that and still use the fast path.
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
       (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
     // Fast path:  We have enough bytes left in the buffer to guarantee that
     // this read won't cross the end, so we can skip the checks.
@@ -430,20 +479,30 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
     // processors.
     uint32 part0 = 0, part1 = 0, part2 = 0;
 
-    b = *(ptr++); part0  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part0 |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1 |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part2  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
-    b = *(ptr++); part2 |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
+    b = *(ptr++); part0  = b      ; if (!(b & 0x80)) goto done;
+    part0 -= 0x80;
+    b = *(ptr++); part0 += b <<  7; if (!(b & 0x80)) goto done;
+    part0 -= 0x80 << 7;
+    b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
+    part0 -= 0x80 << 14;
+    b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
+    part0 -= 0x80 << 21;
+    b = *(ptr++); part1  = b      ; if (!(b & 0x80)) goto done;
+    part1 -= 0x80;
+    b = *(ptr++); part1 += b <<  7; if (!(b & 0x80)) goto done;
+    part1 -= 0x80 << 7;
+    b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
+    part1 -= 0x80 << 14;
+    b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
+    part1 -= 0x80 << 21;
+    b = *(ptr++); part2  = b      ; if (!(b & 0x80)) goto done;
+    part2 -= 0x80;
+    b = *(ptr++); part2 += b <<  7; if (!(b & 0x80)) goto done;
+    // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
 
     // We have overrun the maximum size of a varint (10 bytes).  The data
     // must be corrupt.
-    return NULL;
+    return false;
 
    done:
     Advance(ptr - buffer_);
@@ -483,13 +542,13 @@ bool CodedInputStream::Refresh() {
                       "CodedInputStream::SetTotalBytesLimit() in "
                       "google/protobuf/io/coded_stream.h.";
 
-    // Don't warn again for this stream.
-    total_bytes_warning_threshold_ = -1;
+    // Don't warn again for this stream, and print total size at the end.
+    total_bytes_warning_threshold_ = -2;
   }
 
   const void* void_buffer;
   int buffer_size;
-  if (input_->Next(&void_buffer, &buffer_size)) {
+  if (NextNonEmpty(input_, &void_buffer, &buffer_size)) {
     buffer_ = reinterpret_cast<const uint8*>(void_buffer);
     buffer_end_ = buffer_ + buffer_size;
     GOOGLE_CHECK_GE(buffer_size, 0);
@@ -528,7 +587,8 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
     buffer_(NULL),
     buffer_size_(0),
     total_bytes_(0),
-    had_error_(false) {
+    had_error_(false),
+    aliasing_enabled_(false) {
   // Eagerly Refresh() so buffer space is immediately available.
   Refresh();
   // The Refresh() may have failed. If the client doesn't write any data,
@@ -582,6 +642,23 @@ uint8* CodedOutputStream::WriteRawToArray(
 }
 
 
+void CodedOutputStream::WriteAliasedRaw(const void* data, int size) {
+  if (size < buffer_size_
+      ) {
+    WriteRaw(data, size);
+  } else {
+    if (buffer_size_ > 0) {
+      output_->BackUp(buffer_size_);
+      total_bytes_ -= buffer_size_;
+      buffer_ = NULL;
+      buffer_size_ = 0;
+    }
+
+    total_bytes_ += size;
+    had_error_ |= !output_->WriteAliasedRaw(data, size);
+  }
+}
+
 void CodedOutputStream::WriteLittleEndian32(uint32 value) {
   uint8 bytes[sizeof(value)];
 
@@ -825,6 +902,13 @@ int CodedOutputStream::VarintSize64(uint64 value) {
   }
 }
 
+uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str,
+                                                     uint8* target) {
+  GOOGLE_DCHECK_LE(str.size(), kuint32max);
+  target = WriteVarint32ToArray(str.size(), target);
+  return WriteStringToArray(str, target);
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index e5f6161..50a03a1 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -110,14 +110,27 @@
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
 
 #include <string>
-#ifndef _MSC_VER
-#include <sys/param.h>
-#endif  // !_MSC_VER
+#ifdef _MSC_VER
+  #if defined(_M_IX86) && \
+      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+    #define PROTOBUF_LITTLE_ENDIAN 1
+  #endif
+  #if _MSC_VER >= 1300
+    // If MSVC has "/RTCc" set, it will complain about truncating casts at
+    // runtime.  This file contains some intentional truncating casts.
+    #pragma runtime_checks("c", off)
+  #endif
+#else
+  #include <sys/param.h>   // __BYTE_ORDER
+  #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && \
+      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+    #define PROTOBUF_LITTLE_ENDIAN 1
+  #endif
+#endif
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/common.h>          // for GOOGLE_PREDICT_TRUE macro
 
-namespace google {
 
+namespace google {
 namespace protobuf {
 
 class DescriptorPool;
@@ -157,6 +170,9 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // successfully and the stream's byte limit.
   ~CodedInputStream();
 
+  // Return true if this CodedInputStream reads from a flat array instead of
+  // a ZeroCopyInputStream.
+  inline bool IsFlat() const;
 
   // Skips a number of bytes.  Returns false if an underlying read error
   // occurs.
@@ -217,11 +233,22 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
   // the last tag value, which can be checked with LastTagWas().
-  // Always inline because this is only called in once place per parse loop
+  // Always inline because this is only called in one place per parse loop
   // but it is called for every iteration of said loop, so it should be fast.
   // GCC doesn't want to inline this by default.
   uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
 
+  // This usually a faster alternative to ReadTag() when cutoff is a manifest
+  // constant.  It does particularly well for cutoff >= 127.  The first part
+  // of the return value is the tag that was read, though it can also be 0 in
+  // the cases where ReadTag() would return 0.  If the second part is true
+  // then the tag is known to be in [0, cutoff].  If not, the tag either is
+  // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
+  // because that can arise in several ways, and for best performance we want
+  // to avoid an extra "is tag == 0?" check here.)
+  inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
+      GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
   // Usually returns true if calling ReadVarint32() now would produce the given
   // value.  Will always return false if ReadVarint32() would not return the
   // given value.  If ExpectTag() returns true, it also advances past
@@ -248,8 +275,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // zero, and ConsumedEntireMessage() will return true.
   bool ExpectAtEnd();
 
-  // If the last call to ReadTag() returned the given value, returns true.
-  // Otherwise, returns false;
+  // If the last call to ReadTag() or ReadTagWithCutoff() returned the
+  // given value, returns true.  Otherwise, returns false;
   //
   // This is needed because parsers for some types of embedded messages
   // (with field type TYPE_GROUP) don't actually know that they've reached the
@@ -298,7 +325,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
 
   // Returns the number of bytes left until the nearest limit on the
   // stack is hit, or -1 if no limits are in place.
-  int BytesUntilLimit();
+  int BytesUntilLimit() const;
+
+  // Returns current position relative to the beginning of the input stream.
+  int CurrentPosition() const;
 
   // Total Bytes Limit -----------------------------------------------
   // To prevent malicious users from sending excessively large messages
@@ -314,8 +344,9 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
   // should set shorter limits if possible.  If warning_threshold is not -1,
   // a warning will be printed to stderr after warning_threshold bytes are
-  // read.  An error will always be printed to stderr if the limit is
-  // reached.
+  // read.  For backwards compatibility all negative values get squashed to -1,
+  // as other negative values might have special internal meanings.
+  // An error will always be printed to stderr if the limit is reached.
   //
   // This is unrelated to PushLimit()/PopLimit().
   //
@@ -336,15 +367,20 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   //   something unusual.
   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
 
+  // The Total Bytes Limit minus the Current Position, or -1 if there
+  // is no Total Bytes Limit.
+  int BytesUntilTotalBytesLimit() const;
+
   // Recursion Limit -------------------------------------------------
   // To prevent corrupt or malicious messages from causing stack overflows,
   // we must keep track of the depth of recursion when parsing embedded
   // messages and groups.  CodedInputStream keeps track of this because it
   // is the only object that is passed down the stack during parsing.
 
-  // Sets the maximum recursion depth.  The default is 64.
+  // Sets the maximum recursion depth.  The default is 100.
   void SetRecursionLimit(int limit);
 
+
   // Increments the current recursion depth.  Returns true if the depth is
   // under the limit, false if it has gone over.
   bool IncrementRecursionDepth();
@@ -420,7 +456,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   //
   // Note that this feature is ignored when parsing "lite" messages as they do
   // not have descriptors.
-  void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
+  void SetExtensionRegistry(const DescriptorPool* pool,
+                            MessageFactory* factory);
 
   // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
   // has been provided.
@@ -444,7 +481,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   int overflow_bytes_;
 
   // LastTagWas() stuff.
-  uint32 last_tag_;         // result of last ReadTag().
+  uint32 last_tag_;         // result of last ReadTag() or ReadTagWithCutoff().
 
   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
@@ -469,6 +506,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   // Maximum number of bytes to read, period.  This is unrelated to
   // current_limit_.  Set using SetTotalBytesLimit().
   int total_bytes_limit_;
+
+  // If positive/0: Limit for bytes read after which a warning due to size
+  // should be logged.
+  // If -1: Printing of warning disabled. Can be set by client.
+  // If -2: Internal: Limit has been reached, print full size when destructing.
   int total_bytes_warning_threshold_;
 
   // Current recursion depth, controlled by IncrementRecursionDepth() and
@@ -521,12 +563,13 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
   bool ReadStringFallback(string* buffer, int size);
 
   // Return the size of the buffer.
-  uint32 BufferSize() const;
+  int BufferSize() const;
 
   static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
 
   static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
-  static const int kDefaultRecursionLimit = 64;
+
+  static int default_recursion_limit_;  // 100 by default.
 };
 
 // Class which encodes and writes binary data which is composed of varint-
@@ -554,7 +597,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
 //   char text[] = "Hello world!";
 //
 //   int coded_size = sizeof(magic_number) +
-//                    CodedOutputStream::Varint32Size(strlen(text)) +
+//                    CodedOutputStream::VarintSize32(strlen(text)) +
 //                    strlen(text);
 //
 //   uint8* buffer =
@@ -610,6 +653,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
 
   // Write raw bytes, copying them from the given buffer.
   void WriteRaw(const void* buffer, int size);
+  // Like WriteRaw()  but will try to write aliased data if aliasing is
+  // turned on.
+  void WriteRawMaybeAliased(const void* data, int size);
   // Like WriteRaw()  but writing directly to the target array.
   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
   // copy loops. Since this gets called by every field with string or bytes
@@ -621,8 +667,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   void WriteString(const string& str);
   // Like WriteString()  but writing directly to the target array.
   static uint8* WriteStringToArray(const string& str, uint8* target);
+  // Write the varint-encoded size of str followed by str.
+  static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
 
 
+  // Instructs the CodedOutputStream to allow the underlying
+  // ZeroCopyOutputStream to hold pointers to the original structure instead of
+  // copying, if it supports it (i.e. output->AllowsAliasing() is true).  If the
+  // underlying stream does not support aliasing, then enabling it has no
+  // affect.  For now, this only affects the behavior of
+  // WriteRawMaybeAliased().
+  //
+  // NOTE: It is caller's responsibility to ensure that the chunk of memory
+  // remains live until all of the data has been consumed from the stream.
+  void EnableAliasing(bool enabled);
+
   // Write a 32-bit little-endian integer.
   void WriteLittleEndian32(uint32 value);
   // Like WriteLittleEndian32()  but writing directly to the target array.
@@ -667,6 +726,21 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
   static int VarintSize32SignExtended(int32 value);
 
+  // Compile-time equivalent of VarintSize32().
+  template <uint32 Value>
+  struct StaticVarintSize32 {
+    static const int value =
+        (Value < (1 << 7))
+            ? 1
+            : (Value < (1 << 14))
+                ? 2
+                : (Value < (1 << 21))
+                    ? 3
+                    : (Value < (1 << 28))
+                        ? 4
+                        : 5;
+  };
+
   // Returns the total number of bytes written since this object was created.
   inline int ByteCount() const;
 
@@ -682,6 +756,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   int buffer_size_;
   int total_bytes_;  // Sum of sizes of all buffers seen so far.
   bool had_error_;   // Whether an error occurred during output.
+  bool aliasing_enabled_;  // See EnableAliasing().
 
   // Advance the buffer by a given number of bytes.
   void Advance(int amount);
@@ -690,6 +765,10 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
   // Advance(buffer_size_).
   bool Refresh();
 
+  // Like WriteRaw() but may avoid copying if the underlying
+  // ZeroCopyOutputStream supports it.
+  void WriteAliasedRaw(const void* buffer, int size);
+
   static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
 
   // Always-inlined versions of WriteVarint* functions so that code can be
@@ -735,8 +814,7 @@ inline bool CodedInputStream::ReadVarint64(uint64* value) {
 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
     const uint8* buffer,
     uint32* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(value, buffer, sizeof(*value));
   return buffer + sizeof(*value);
 #else
@@ -751,8 +829,7 @@ inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
     const uint8* buffer,
     uint64* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(value, buffer, sizeof(*value));
   return buffer + sizeof(*value);
 #else
@@ -771,9 +848,8 @@ inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
 }
 
 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
-  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+  if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
     memcpy(value, buffer_, sizeof(*value));
     Advance(sizeof(*value));
     return true;
@@ -786,9 +862,8 @@ inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
 }
 
 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
-  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+  if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
     memcpy(value, buffer_, sizeof(*value));
     Advance(sizeof(*value));
     return true;
@@ -811,6 +886,45 @@ inline uint32 CodedInputStream::ReadTag() {
   }
 }
 
+inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
+    uint32 cutoff) {
+  // In performance-sensitive code we can expect cutoff to be a compile-time
+  // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
+  // compile time.
+  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+    // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
+    // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
+    // is large enough then is it better to check for the two-byte case first?
+    if (static_cast<int8>(buffer_[0]) > 0) {
+      const uint32 kMax1ByteVarint = 0x7f;
+      uint32 tag = last_tag_ = buffer_[0];
+      Advance(1);
+      return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
+    }
+    // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
+    // and tag is two bytes.  The latter is tested by bitwise-and-not of the
+    // first byte and the second byte.
+    if (cutoff >= 0x80 &&
+        GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
+        GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
+      const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
+      uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
+      Advance(2);
+      // It might make sense to test for tag == 0 now, but it is so rare that
+      // that we don't bother.  A varint-encoded 0 should be one byte unless
+      // the encoder lost its mind.  The second part of the return value of
+      // this function is allowed to be either true or false if the tag is 0,
+      // so we don't have to check for tag == 0.  We may need to check whether
+      // it exceeds cutoff.
+      bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
+      return make_pair(tag, at_or_below_cutoff);
+    }
+  }
+  // Slow path
+  last_tag_ = ReadTagFallback();
+  return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
+}
+
 inline bool CodedInputStream::LastTagWas(uint32 expected) {
   return last_tag_ == expected;
 }
@@ -867,7 +981,9 @@ inline bool CodedInputStream::ExpectAtEnd() {
   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
   // hard to say without calling Refresh(), and we'd rather not do that.
 
-  if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
+  if (buffer_ == buffer_end_ &&
+      ((buffer_size_after_limit_ != 0) ||
+       (total_bytes_read_ == current_limit_))) {
     last_tag_ = 0;                   // Pretend we called ReadTag()...
     legitimate_message_end_ = true;  // ... and it hit EOF.
     return true;
@@ -876,6 +992,10 @@ inline bool CodedInputStream::ExpectAtEnd() {
   }
 }
 
+inline int CodedInputStream::CurrentPosition() const {
+  return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
+}
+
 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
   if (buffer_size_ < size) {
     return NULL;
@@ -915,8 +1035,7 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
 
 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
                                                             uint8* target) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(target, &value, sizeof(value));
 #else
   target[0] = static_cast<uint8>(value);
@@ -929,8 +1048,7 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
 
 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
                                                             uint8* target) {
-#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
-    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(PROTOBUF_LITTLE_ENDIAN)
   memcpy(target, &value, sizeof(value));
 #else
   uint32 part0 = static_cast<uint32>(value);
@@ -983,12 +1101,21 @@ inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
 }
 
 inline void CodedOutputStream::WriteString(const string& str) {
-  WriteRaw(str.data(), str.size());
+  WriteRaw(str.data(), static_cast<int>(str.size()));
+}
+
+inline void CodedOutputStream::WriteRawMaybeAliased(
+    const void* data, int size) {
+  if (aliasing_enabled_) {
+    WriteAliasedRaw(data, size);
+  } else {
+    WriteRaw(data, size);
+  }
 }
 
 inline uint8* CodedOutputStream::WriteStringToArray(
     const string& str, uint8* target) {
-  return WriteRawToArray(str.data(), str.size(), target);
+  return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
 }
 
 inline int CodedOutputStream::ByteCount() const {
@@ -1017,7 +1144,7 @@ inline void CodedInputStream::DecrementRecursionDepth() {
   if (recursion_depth_ > 0) --recursion_depth_;
 }
 
-inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
+inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
                                                    MessageFactory* factory) {
   extension_pool_ = pool;
   extension_factory_ = factory;
@@ -1031,7 +1158,7 @@ inline MessageFactory* CodedInputStream::GetExtensionFactory() {
   return extension_factory_;
 }
 
-inline uint32 CodedInputStream::BufferSize() const {
+inline int CodedInputStream::BufferSize() const {
   return buffer_end_ - buffer_;
 }
 
@@ -1044,12 +1171,12 @@ inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
     last_tag_(0),
     legitimate_message_end_(false),
     aliasing_enabled_(false),
-    current_limit_(INT_MAX),
+    current_limit_(kint32max),
     buffer_size_after_limit_(0),
     total_bytes_limit_(kDefaultTotalBytesLimit),
     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
     recursion_depth_(0),
-    recursion_limit_(kDefaultRecursionLimit),
+    recursion_limit_(default_recursion_limit_),
     extension_pool_(NULL),
     extension_factory_(NULL) {
   // Eagerly Refresh() so buffer space is immediately available.
@@ -1070,21 +1197,24 @@ inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
     total_bytes_limit_(kDefaultTotalBytesLimit),
     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
     recursion_depth_(0),
-    recursion_limit_(kDefaultRecursionLimit),
+    recursion_limit_(default_recursion_limit_),
     extension_pool_(NULL),
     extension_factory_(NULL) {
   // Note that setting current_limit_ == size is important to prevent some
   // code paths from trying to access input_ and segfaulting.
 }
 
-inline CodedInputStream::~CodedInputStream() {
-  if (input_ != NULL) {
-    BackUpInputToCurrentPosition();
-  }
+inline bool CodedInputStream::IsFlat() const {
+  return input_ == NULL;
 }
 
 }  // namespace io
 }  // namespace protobuf
 
+
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+  #pragma runtime_checks("c", restore)
+#endif  // _MSC_VER
+
 }  // namespace google
 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h
index e9799d4..41dc10e 100644
--- a/src/google/protobuf/io/coded_stream_inl.h
+++ b/src/google/protobuf/io/coded_stream_inl.h
@@ -37,8 +37,9 @@
 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
 
 #include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 #include <string>
-#include <google/protobuf/stubs/stl_util-inl.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 namespace google {
 namespace protobuf {
@@ -50,8 +51,12 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer,
 
   if (BufferSize() >= size) {
     STLStringResizeUninitialized(buffer, size);
-    memcpy(string_as_array(buffer), buffer_, size);
-    Advance(size);
+    // When buffer is empty, string_as_array(buffer) will return NULL but memcpy
+    // requires non-NULL pointers even when size is 0. Hench this check.
+    if (size > 0) {
+      memcpy(mutable_string_data(buffer), buffer_, size);
+      Advance(size);
+    }
     return true;
   }
 
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index 7d29833..0cfeb88 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -44,7 +44,6 @@
 #include <google/protobuf/testing/googletest.h>
 #include <gtest/gtest.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/stubs/strutil.h>
 
 
 // This declares an unsigned long long integer literal in a portable way.
@@ -125,6 +124,13 @@ namespace {
 
 class CodedStreamTest : public testing::Test {
  protected:
+  // Helper method used by tests for bytes warning. See implementation comment
+  // for further information.
+  static void SetupTotalBytesLimitWarningTest(
+      int total_bytes_limit, int warning_threshold,
+      vector<string>* out_errors, vector<string>* out_warnings);
+
+  // Buffer used during most of the tests. This assumes tests run sequentially.
   static const int kBufferSize = 1024 * 64;
   static uint8 buffer_[kBufferSize];
 };
@@ -208,6 +214,33 @@ TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) {
   EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
 }
 
+// This is the regression test that verifies that there is no issues
+// with the empty input buffers handling.
+TEST_F(CodedStreamTest, EmptyInputBeforeEos) {
+  class In : public ZeroCopyInputStream {
+   public:
+    In() : count_(0) {}
+   private:
+    virtual bool Next(const void** data, int* size) {
+      *data = NULL;
+      *size = 0;
+      return count_++ < 2;
+    }
+    virtual void BackUp(int count)  {
+      GOOGLE_LOG(FATAL) << "Tests never call this.";
+    }
+    virtual bool Skip(int count) {
+      GOOGLE_LOG(FATAL) << "Tests never call this.";
+      return false;
+    }
+    virtual int64 ByteCount() const { return 0; }
+    int count_;
+  } in;
+  CodedInputStream input(&in);
+  input.ReadTag();
+  EXPECT_TRUE(input.ConsumedEntireMessage());
+}
+
 TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) {
   // Leave one byte at the beginning of the buffer so we can read it
   // to force the first buffer to be loaded.
@@ -649,14 +682,197 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
   EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
 }
 
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes));
+    EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit());
+
+    string str;
+    EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes),
+              coded_input.BytesUntilTotalBytesLimit());
+    EXPECT_EQ(kRawBytes, str);
+    // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+    EXPECT_GE(str.capacity(), strlen(kRawBytes));
+  }
+
+  EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+
+    string str;
+    EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    EXPECT_EQ(kRawBytes, str);
+    // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+    EXPECT_GE(str.capacity(), strlen(kRawBytes));
+  }
+
+  EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+
+    string str;
+    EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    EXPECT_EQ(kRawBytes, str);
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate more than strlen(kRawBytes)
+    // if the content of kRawBytes is appended to string in small
+    // chunks.
+    // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+    EXPECT_GE(str.capacity(), strlen(kRawBytes));
+  }
+
+  EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, -1));
+    // Note: this check depends on string class implementation. It
+    // expects that string will always allocate the same amount of
+    // memory for an empty string.
+    EXPECT_EQ(string().capacity(), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+    EXPECT_GT(1 << 30, str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(16);
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.SetTotalBytesLimit(16, 16);
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest,
+       ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(sizeof(buffer_));
+    coded_input.SetTotalBytesLimit(16, 16);
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
+TEST_F(CodedStreamTest,
+       ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) {
+  memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+  // Buffer size in the input must be smaller than sizeof(kRawBytes),
+  // otherwise check against capacity will fail as ReadStringInline()
+  // will handle the reading and will reserve the memory as needed.
+  ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+  {
+    CodedInputStream coded_input(&input);
+    coded_input.PushLimit(16);
+    coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_));
+    EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit());
+
+    string str;
+    EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+    // Note: this check depends on string class implementation. It
+    // expects that string will allocate less than strlen(kRawBytes)
+    // for an empty string.
+    EXPECT_GT(strlen(kRawBytes), str.capacity());
+  }
+}
+
 
 // -------------------------------------------------------------------
 // Skip
 
 const char kSkipTestBytes[] =
   "<Before skipping><To be skipped><After skipping>";
-const char kSkipOutputTestBytes[] =
-  "-----------------<To be skipped>----------------";
 
 TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) {
   memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes));
@@ -947,9 +1163,11 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
   ArrayInputStream input(buffer_, sizeof(buffer_));
   CodedInputStream coded_input(&input);
   coded_input.SetTotalBytesLimit(16, -1);
+  EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
 
   string str;
   EXPECT_TRUE(coded_input.ReadString(&str, 16));
+  EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
 
   vector<string> errors;
 
@@ -964,7 +1182,9 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
     "A protocol message was rejected because it was too big", errors[0]);
 
   coded_input.SetTotalBytesLimit(32, -1);
+  EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
   EXPECT_TRUE(coded_input.ReadString(&str, 16));
+  EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
 }
 
 TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
@@ -995,6 +1215,60 @@ TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
   EXPECT_FALSE(coded_input.ConsumedEntireMessage());
 }
 
+// This method is used by the tests below.
+// It constructs a CodedInputStream with the given limits and tries to read 2KiB
+// of data from it. Then it returns the logged errors and warnings in the given
+// vectors.
+void CodedStreamTest::SetupTotalBytesLimitWarningTest(
+    int total_bytes_limit, int warning_threshold,
+    vector<string>* out_errors, vector<string>* out_warnings) {
+  ArrayInputStream raw_input(buffer_, sizeof(buffer_), 128);
+
+  ScopedMemoryLog scoped_log;
+  {
+    CodedInputStream input(&raw_input);
+    input.SetTotalBytesLimit(total_bytes_limit, warning_threshold);
+    string str;
+    EXPECT_TRUE(input.ReadString(&str, 2048));
+  }
+
+  *out_errors = scoped_log.GetMessages(ERROR);
+  *out_warnings = scoped_log.GetMessages(WARNING);
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimitWarning) {
+  vector<string> errors;
+  vector<string> warnings;
+  SetupTotalBytesLimitWarningTest(10240, 1024, &errors, &warnings);
+
+  EXPECT_EQ(0, errors.size());
+
+  ASSERT_EQ(2, warnings.size());
+  EXPECT_PRED_FORMAT2(testing::IsSubstring,
+    "Reading dangerously large protocol message.  If the message turns out to "
+    "be larger than 10240 bytes, parsing will be halted for security reasons.",
+    warnings[0]);
+  EXPECT_PRED_FORMAT2(testing::IsSubstring,
+    "The total number of bytes read was 2048",
+    warnings[1]);
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimitWarningDisabled) {
+  vector<string> errors;
+  vector<string> warnings;
+
+  // Test with -1
+  SetupTotalBytesLimitWarningTest(10240, -1, &errors, &warnings);
+  EXPECT_EQ(0, errors.size());
+  EXPECT_EQ(0, warnings.size());
+
+  // Test again with -2, expecting the same result
+  SetupTotalBytesLimitWarningTest(10240, -2, &errors, &warnings);
+  EXPECT_EQ(0, errors.size());
+  EXPECT_EQ(0, warnings.size());
+}
+
+
 TEST_F(CodedStreamTest, RecursionLimit) {
   ArrayInputStream input(buffer_, sizeof(buffer_));
   CodedInputStream coded_input(&input);
@@ -1032,6 +1306,7 @@ TEST_F(CodedStreamTest, RecursionLimit) {
   EXPECT_FALSE(coded_input.IncrementRecursionDepth());     // 7
 }
 
+
 class ReallyBigInputStream : public ZeroCopyInputStream {
  public:
   ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {}
diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc
index 84d277f..fe1f331 100644
--- a/src/google/protobuf/io/gzip_stream.cc
+++ b/src/google/protobuf/io/gzip_stream.cc
@@ -73,6 +73,17 @@ GzipInputStream::~GzipInputStream() {
   zerror_ = inflateEnd(&zcontext_);
 }
 
+static inline int internalInflateInit2(
+    z_stream* zcontext, GzipInputStream::Format format) {
+  int windowBitsFormat = 0;
+  switch (format) {
+    case GzipInputStream::GZIP: windowBitsFormat = 16; break;
+    case GzipInputStream::AUTO: windowBitsFormat = 32; break;
+    case GzipInputStream::ZLIB: windowBitsFormat = 0; break;
+  }
+  return inflateInit2(zcontext, /* windowBits */15 | windowBitsFormat);
+}
+
 int GzipInputStream::Inflate(int flush) {
   if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) {
     // previous inflate filled output buffer. don't change input params yet.
@@ -89,14 +100,7 @@ int GzipInputStream::Inflate(int flush) {
     zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in));
     zcontext_.avail_in = in_size;
     if (first) {
-      int windowBitsFormat = 0;
-      switch (format_) {
-        case GZIP: windowBitsFormat = 16; break;
-        case AUTO: windowBitsFormat = 32; break;
-        case ZLIB: windowBitsFormat = 0; break;
-      }
-      int error = inflateInit2(&zcontext_,
-        /* windowBits */15 | windowBitsFormat);
+      int error = internalInflateInit2(&zcontext_, format_);
       if (error != Z_OK) {
         return error;
       }
@@ -127,9 +131,21 @@ bool GzipInputStream::Next(const void** data, int* size) {
     return true;
   }
   if (zerror_ == Z_STREAM_END) {
-    *data = NULL;
-    *size = 0;
-    return false;
+    if (zcontext_.next_out != NULL) {
+      // sub_stream_ may have concatenated streams to follow
+      zerror_ = inflateEnd(&zcontext_);
+      if (zerror_ != Z_OK) {
+        return false;
+      }
+      zerror_ = internalInflateInit2(&zcontext_, format_);
+      if (zerror_ != Z_OK) {
+        return false;
+      }
+    } else {
+      *data = NULL;
+      *size = 0;
+      return false;
+    }
   }
   zerror_ = Inflate(Z_NO_FLUSH);
   if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) {
@@ -183,16 +199,6 @@ GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream,
   Init(sub_stream, options);
 }
 
-GzipOutputStream::GzipOutputStream(
-    ZeroCopyOutputStream* sub_stream, Format format, int buffer_size) {
-  Options options;
-  options.format = format;
-  if (buffer_size != -1) {
-    options.buffer_size = buffer_size;
-  }
-  Init(sub_stream, options);
-}
-
 void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream,
                             const Options& options) {
   sub_stream_ = sub_stream;
@@ -251,8 +257,7 @@ int GzipOutputStream::Deflate(int flush) {
     }
     error = deflate(&zcontext_, flush);
   } while (error == Z_OK && zcontext_.avail_out == 0);
-  if (((flush == Z_FULL_FLUSH) || (flush == Z_FINISH))
-      && (zcontext_.avail_out != sub_data_size_)) {
+  if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) {
     // Notify lower layer of data.
     sub_stream_->BackUp(zcontext_.avail_out);
     // We don't own the buffer anymore.
@@ -294,10 +299,11 @@ int64 GzipOutputStream::ByteCount() const {
 }
 
 bool GzipOutputStream::Flush() {
-  do {
-    zerror_ = Deflate(Z_FULL_FLUSH);
-  } while (zerror_ == Z_OK);
-  return zerror_ == Z_OK;
+  zerror_ = Deflate(Z_FULL_FLUSH);
+  // Return true if the flush succeeded or if it was a no-op.
+  return  (zerror_ == Z_OK) ||
+      (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 &&
+       zcontext_.avail_out != 0);
 }
 
 bool GzipOutputStream::Close() {
diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h
index 65dbc5b..7ee24bc 100644
--- a/src/google/protobuf/io/gzip_stream.h
+++ b/src/google/protobuf/io/gzip_stream.h
@@ -45,6 +45,7 @@
 
 #include <zlib.h>
 
+#include <google/protobuf/stubs/common.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 
 namespace google {
@@ -144,12 +145,6 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
       ZeroCopyOutputStream* sub_stream,
       const Options& options);
 
-  // DEPRECATED:  Use one of the above constructors instead.
-  GzipOutputStream(
-      ZeroCopyOutputStream* sub_stream,
-      Format format,
-      int buffer_size = -1) GOOGLE_ATTRIBUTE_DEPRECATED;
-
   virtual ~GzipOutputStream();
 
   // Return last error message or NULL if no error.
@@ -165,6 +160,13 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
   // necessary.
   // Compression may be less efficient stopping and starting around flushes.
   // Returns true if no error.
+  //
+  // Please ensure that block size is > 6. Here is an excerpt from the zlib
+  // doc that explains why:
+  //
+  // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out
+  // is greater than six to avoid repeated flush markers due to
+  // avail_out == 0 on return.
   bool Flush();
 
   // Writes out all data and closes the gzip stream.
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
index c7d3074..d2bf3f5 100644
--- a/src/google/protobuf/io/printer.cc
+++ b/src/google/protobuf/io/printer.cc
@@ -35,7 +35,6 @@
 #include <google/protobuf/io/printer.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/strutil.h>
 
 namespace google {
 namespace protobuf {
@@ -51,8 +50,8 @@ Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter)
 }
 
 Printer::~Printer() {
-  // Only BackUp() if we're sure we've successfully called Next() at least once.
-  if (buffer_size_ > 0) {
+  // Only BackUp() if we have called Next() at least once and never failed.
+  if (buffer_size_ > 0 && !failed_) {
     output_->BackUp(buffer_size_);
   }
 }
@@ -132,6 +131,17 @@ void Printer::Print(const char* text,
   Print(vars, text);
 }
 
+void Printer::Print(const char* text,
+                    const char* variable1, const string& value1,
+                    const char* variable2, const string& value2,
+                    const char* variable3, const string& value3) {
+  map<string, string> vars;
+  vars[variable1] = value1;
+  vars[variable2] = value2;
+  vars[variable3] = value3;
+  Print(vars, text);
+}
+
 void Printer::Indent() {
   indent_ += "  ";
 }
@@ -158,7 +168,7 @@ void Printer::WriteRaw(const char* data, int size) {
   if (failed_) return;
   if (size == 0) return;
 
-  if (at_start_of_line_) {
+  if (at_start_of_line_ && (size > 0) && (data[0] != '\n')) {
     // Insert an indent.
     at_start_of_line_ = false;
     WriteRaw(indent_.data(), indent_.size());
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
index de08538..5be4854 100644
--- a/src/google/protobuf/io/printer.h
+++ b/src/google/protobuf/io/printer.h
@@ -82,7 +82,11 @@ class LIBPROTOBUF_EXPORT Printer {
   // Like the first Print(), except the substitutions are given as parameters.
   void Print(const char* text, const char* variable1, const string& value1,
                                const char* variable2, const string& value2);
-  // TODO(kenton):  Overloaded versions with more variables?  Two seems
+  // Like the first Print(), except the substitutions are given as parameters.
+  void Print(const char* text, const char* variable1, const string& value1,
+                               const char* variable2, const string& value2,
+                               const char* variable3, const string& value3);
+  // TODO(kenton):  Overloaded versions with more variables?  Three seems
   //   to be enough.
 
   // Indent text by two spaces.  After calling Indent(), two spaces will be
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index 580a53d..76fb944 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -220,7 +220,7 @@ TEST(Printer, Indenting) {
 }
 
 // Death tests do not work on Windows as of yet.
-#ifdef GTEST_HAS_DEATH_TEST
+#ifdef PROTOBUF_HAS_DEATH_TEST
 TEST(Printer, Death) {
   char buffer[8192];
 
@@ -231,9 +231,33 @@ TEST(Printer, Death) {
   EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name");
   EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent");
 }
-#endif  // GTEST_HAS_DEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 
-TEST(Printer, WriteFailure) {
+TEST(Printer, WriteFailurePartial) {
+  char buffer[17];
+
+  ArrayOutputStream output(buffer, sizeof(buffer));
+  Printer printer(&output, '$');
+
+  // Print 16 bytes to almost fill the buffer (should not fail).
+  printer.Print("0123456789abcdef");
+  EXPECT_FALSE(printer.failed());
+
+  // Try to print 2 chars. Only one fits.
+  printer.Print("<>");
+  EXPECT_TRUE(printer.failed());
+
+  // Anything else should fail too.
+  printer.Print(" ");
+  EXPECT_TRUE(printer.failed());
+  printer.Print("blah");
+  EXPECT_TRUE(printer.failed());
+
+  // Buffer should contain the first 17 bytes written.
+  EXPECT_EQ("0123456789abcdef<", string(buffer, sizeof(buffer)));
+}
+
+TEST(Printer, WriteFailureExact) {
   char buffer[16];
 
   ArrayOutputStream output(buffer, sizeof(buffer));
diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc
new file mode 100644
index 0000000..cf0c6e1
--- /dev/null
+++ b/src/google/protobuf/io/strtod.cc
@@ -0,0 +1,113 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/io/strtod.h>
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// ----------------------------------------------------------------------
+// NoLocaleStrtod()
+//   This code will make you cry.
+// ----------------------------------------------------------------------
+
+namespace {
+
+// Returns a string identical to *input except that the character pointed to
+// by radix_pos (which should be '.') is replaced with the locale-specific
+// radix character.
+string LocalizeRadix(const char* input, const char* radix_pos) {
+  // Determine the locale-specific radix character by calling sprintf() to
+  // print the number 1.5, then stripping off the digits.  As far as I can
+  // tell, this is the only portable, thread-safe way to get the C library
+  // to divuldge the locale's radix character.  No, localeconv() is NOT
+  // thread-safe.
+  char temp[16];
+  int size = sprintf(temp, "%.1f", 1.5);
+  GOOGLE_CHECK_EQ(temp[0], '1');
+  GOOGLE_CHECK_EQ(temp[size-1], '5');
+  GOOGLE_CHECK_LE(size, 6);
+
+  // Now replace the '.' in the input with it.
+  string result;
+  result.reserve(strlen(input) + size - 3);
+  result.append(input, radix_pos);
+  result.append(temp + 1, size - 2);
+  result.append(radix_pos + 1);
+  return result;
+}
+
+}  // namespace
+
+double NoLocaleStrtod(const char* text, char** original_endptr) {
+  // We cannot simply set the locale to "C" temporarily with setlocale()
+  // as this is not thread-safe.  Instead, we try to parse in the current
+  // locale first.  If parsing stops at a '.' character, then this is a
+  // pretty good hint that we're actually in some other locale in which
+  // '.' is not the radix character.
+
+  char* temp_endptr;
+  double result = strtod(text, &temp_endptr);
+  if (original_endptr != NULL) *original_endptr = temp_endptr;
+  if (*temp_endptr != '.') return result;
+
+  // Parsing halted on a '.'.  Perhaps we're in a different locale?  Let's
+  // try to replace the '.' with a locale-specific radix character and
+  // try again.
+  string localized = LocalizeRadix(text, temp_endptr);
+  const char* localized_cstr = localized.c_str();
+  char* localized_endptr;
+  result = strtod(localized_cstr, &localized_endptr);
+  if ((localized_endptr - localized_cstr) >
+      (temp_endptr - text)) {
+    // This attempt got further, so replacing the decimal must have helped.
+    // Update original_endptr to point at the right location.
+    if (original_endptr != NULL) {
+      // size_diff is non-zero if the localized radix has multiple bytes.
+      int size_diff = localized.size() - strlen(text);
+      // const_cast is necessary to match the strtod() interface.
+      *original_endptr = const_cast<char*>(
+        text + (localized_endptr - localized_cstr - size_diff));
+    }
+  }
+
+  return result;
+}
+
+}  // namespace io
+}  // namespace protobuf
+}  // namespace google
diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h
new file mode 100644
index 0000000..2be3c43
--- /dev/null
+++ b/src/google/protobuf/io/strtod.h
@@ -0,0 +1,50 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A locale-independent version of strtod(), used to parse floating
+// point default values in .proto files, where the decimal separator
+// is always a dot.
+
+#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__
+#define GOOGLE_PROTOBUF_IO_STRTOD_H__
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// A locale-independent version of the standard strtod(), which always
+// uses a dot as the decimal separator.
+double NoLocaleStrtod(const char* str, char** endptr);
+
+}  // namespace io
+}  // namespace protobuf
+
+}  // namespace google
+#endif  // GOOGLE_PROTOBUF_IO_STRTOD_H__
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index 38fa351..d149305 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -89,8 +89,12 @@
 // exactly pretty.
 
 #include <google/protobuf/io/tokenizer.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stringprintf.h>
+#include <google/protobuf/io/strtod.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 namespace google {
 namespace protobuf {
@@ -118,6 +122,8 @@ namespace {
 
 CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
                             c == '\r' || c == '\v' || c == '\f');
+CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' ||
+                                     c == '\r' || c == '\v' || c == '\f');
 
 CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
 
@@ -187,12 +193,16 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
     read_error_(false),
     line_(0),
     column_(0),
-    token_start_(-1),
+    record_target_(NULL),
+    record_start_(-1),
     allow_f_after_float_(false),
-    comment_style_(CPP_COMMENT_STYLE) {
+    comment_style_(CPP_COMMENT_STYLE),
+    require_space_after_number_(true),
+    allow_multiline_strings_(false) {
 
   current_.line = 0;
   current_.column = 0;
+  current_.end_column = 0;
   current_.type = TYPE_START;
 
   Refresh();
@@ -237,9 +247,9 @@ void Tokenizer::Refresh() {
   }
 
   // If we're in a token, append the rest of the buffer to it.
-  if (token_start_ >= 0 && token_start_ < buffer_size_) {
-    current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_);
-    token_start_ = 0;
+  if (record_target_ != NULL && record_start_ < buffer_size_) {
+    record_target_->append(buffer_ + record_start_, buffer_size_ - record_start_);
+    record_start_ = 0;
   }
 
   const void* data = NULL;
@@ -260,23 +270,34 @@ void Tokenizer::Refresh() {
   current_char_ = buffer_[0];
 }
 
+inline void Tokenizer::RecordTo(string* target) {
+  record_target_ = target;
+  record_start_ = buffer_pos_;
+}
+
+inline void Tokenizer::StopRecording() {
+  // Note:  The if() is necessary because some STL implementations crash when
+  //   you call string::append(NULL, 0), presumably because they are trying to
+  //   be helpful by detecting the NULL pointer, even though there's nothing
+  //   wrong with reading zero bytes from NULL.
+  if (buffer_pos_ != record_start_) {
+    record_target_->append(buffer_ + record_start_, buffer_pos_ - record_start_);
+  }
+  record_target_ = NULL;
+  record_start_ = -1;
+}
+
 inline void Tokenizer::StartToken() {
-  token_start_ = buffer_pos_;
   current_.type = TYPE_START;    // Just for the sake of initializing it.
   current_.text.clear();
   current_.line = line_;
   current_.column = column_;
+  RecordTo(&current_.text);
 }
 
 inline void Tokenizer::EndToken() {
-  // Note:  The if() is necessary because some STL implementations crash when
-  //   you call string::append(NULL, 0), presumably because they are trying to
-  //   be helpful by detecting the NULL pointer, even though there's nothing
-  //   wrong with reading zero bytes from NULL.
-  if (buffer_pos_ != token_start_) {
-    current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_);
-  }
-  token_start_ = -1;
+  StopRecording();
+  current_.end_column = column_;
 }
 
 // -------------------------------------------------------------------
@@ -332,9 +353,16 @@ void Tokenizer::ConsumeString(char delimiter) {
   while (true) {
     switch (current_char_) {
       case '\0':
-      case '\n': {
-        AddError("String literals cannot cross line boundaries.");
+        AddError("Unexpected end of string.");
         return;
+
+      case '\n': {
+        if (!allow_multiline_strings_) {
+          AddError("String literals cannot cross line boundaries.");
+          return;
+        }
+        NextChar();
+        break;
       }
 
       case '\\': {
@@ -351,6 +379,27 @@ void Tokenizer::ConsumeString(char delimiter) {
             AddError("Expected hex digits for escape sequence.");
           }
           // Possibly followed by another hex digit, but again we don't care.
+        } else if (TryConsume('u')) {
+          if (!TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>()) {
+            AddError("Expected four hex digits for \\u escape sequence.");
+          }
+        } else if (TryConsume('U')) {
+          // We expect 8 hex digits; but only the range up to 0x10ffff is
+          // legal.
+          if (!TryConsume('0') ||
+              !TryConsume('0') ||
+              !(TryConsume('0') || TryConsume('1')) ||
+              !TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>() ||
+              !TryConsumeOne<HexDigit>()) {
+            AddError("Expected eight hex digits up to 10ffff for \\U escape "
+                     "sequence");
+          }
         } else {
           AddError("Invalid escape sequence in string literal.");
         }
@@ -410,7 +459,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
     }
   }
 
-  if (LookingAt<Letter>()) {
+  if (LookingAt<Letter>() && require_space_after_number_) {
     AddError("Need space between number and identifier.");
   } else if (current_char_ == '.') {
     if (is_float) {
@@ -424,26 +473,51 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
   return is_float ? TYPE_FLOAT : TYPE_INTEGER;
 }
 
-void Tokenizer::ConsumeLineComment() {
+void Tokenizer::ConsumeLineComment(string* content) {
+  if (content != NULL) RecordTo(content);
+
   while (current_char_ != '\0' && current_char_ != '\n') {
     NextChar();
   }
   TryConsume('\n');
+
+  if (content != NULL) StopRecording();
 }
 
-void Tokenizer::ConsumeBlockComment() {
+void Tokenizer::ConsumeBlockComment(string* content) {
   int start_line = line_;
   int start_column = column_ - 2;
 
+  if (content != NULL) RecordTo(content);
+
   while (true) {
     while (current_char_ != '\0' &&
            current_char_ != '*' &&
-           current_char_ != '/') {
+           current_char_ != '/' &&
+           current_char_ != '\n') {
       NextChar();
     }
 
-    if (TryConsume('*') && TryConsume('/')) {
+    if (TryConsume('\n')) {
+      if (content != NULL) StopRecording();
+
+      // Consume leading whitespace and asterisk;
+      ConsumeZeroOrMore<WhitespaceNoNewline>();
+      if (TryConsume('*')) {
+        if (TryConsume('/')) {
+          // End of comment.
+          break;
+        }
+      }
+
+      if (content != NULL) RecordTo(content);
+    } else if (TryConsume('*') && TryConsume('/')) {
       // End of comment.
+      if (content != NULL) {
+        StopRecording();
+        // Strip trailing "*/".
+        content->erase(content->size() - 2);
+      }
       break;
     } else if (TryConsume('/') && current_char_ == '*') {
       // Note:  We didn't consume the '*' because if there is a '/' after it
@@ -454,42 +528,59 @@ void Tokenizer::ConsumeBlockComment() {
       AddError("End-of-file inside block comment.");
       error_collector_->AddError(
         start_line, start_column, "  Comment started here.");
+      if (content != NULL) StopRecording();
       break;
     }
   }
 }
 
+Tokenizer::NextCommentStatus Tokenizer::TryConsumeCommentStart() {
+  if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
+    if (TryConsume('/')) {
+      return LINE_COMMENT;
+    } else if (TryConsume('*')) {
+      return BLOCK_COMMENT;
+    } else {
+      // Oops, it was just a slash.  Return it.
+      current_.type = TYPE_SYMBOL;
+      current_.text = "/";
+      current_.line = line_;
+      current_.column = column_ - 1;
+      current_.end_column = column_;
+      return SLASH_NOT_COMMENT;
+    }
+  } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
+    return LINE_COMMENT;
+  } else {
+    return NO_COMMENT;
+  }
+}
+
 // -------------------------------------------------------------------
 
 bool Tokenizer::Next() {
-  TokenType last_token_type = current_.type;
-
-  // Did we skip any characters after the last token?
-  bool skipped_stuff = false;
+  previous_ = current_;
 
   while (!read_error_) {
-    if (TryConsumeOne<Whitespace>()) {
-      ConsumeZeroOrMore<Whitespace>();
-
-    } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
-      // Starting a comment?
-      if (TryConsume('/')) {
-        ConsumeLineComment();
-      } else if (TryConsume('*')) {
-        ConsumeBlockComment();
-      } else {
-        // Oops, it was just a slash.  Return it.
-        current_.type = TYPE_SYMBOL;
-        current_.text = "/";
-        current_.line = line_;
-        current_.column = column_ - 1;
+    ConsumeZeroOrMore<Whitespace>();
+
+    switch (TryConsumeCommentStart()) {
+      case LINE_COMMENT:
+        ConsumeLineComment(NULL);
+        continue;
+      case BLOCK_COMMENT:
+        ConsumeBlockComment(NULL);
+        continue;
+      case SLASH_NOT_COMMENT:
         return true;
-      }
+      case NO_COMMENT:
+        break;
+    }
 
-    } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
-      ConsumeLineComment();
+    // Check for EOF before continuing.
+    if (read_error_) break;
 
-    } else if (LookingAt<Unprintable>() || current_char_ == '\0') {
+    if (LookingAt<Unprintable>() || current_char_ == '\0') {
       AddError("Invalid control characters encountered in text.");
       NextChar();
       // Skip more unprintable characters, too.  But, remember that '\0' is
@@ -517,7 +608,9 @@ bool Tokenizer::Next() {
 
         if (TryConsumeOne<Digit>()) {
           // It's a floating-point number.
-          if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) {
+          if (previous_.type == TYPE_IDENTIFIER &&
+              current_.line == previous_.line &&
+              current_.column == previous_.end_column) {
             // We don't accept syntax like "blah.123".
             error_collector_->AddError(line_, column_ - 2,
               "Need space between identifier and decimal point.");
@@ -535,6 +628,12 @@ bool Tokenizer::Next() {
         ConsumeString('\'');
         current_.type = TYPE_STRING;
       } else {
+        // Check if the high order bit is set.
+        if (current_char_ & 0x80) {
+          error_collector_->AddError(line_, column_,
+              StringPrintf("Interpreting non ascii codepoint %d.",
+                           static_cast<unsigned char>(current_char_)));
+        }
         NextChar();
         current_.type = TYPE_SYMBOL;
       }
@@ -542,8 +641,6 @@ bool Tokenizer::Next() {
       EndToken();
       return true;
     }
-
-    skipped_stuff = true;
   }
 
   // EOF
@@ -551,9 +648,199 @@ bool Tokenizer::Next() {
   current_.text.clear();
   current_.line = line_;
   current_.column = column_;
+  current_.end_column = column_;
   return false;
 }
 
+namespace {
+
+// Helper class for collecting comments and putting them in the right places.
+//
+// This basically just buffers the most recent comment until it can be decided
+// exactly where that comment should be placed.  When Flush() is called, the
+// current comment goes into either prev_trailing_comments or detached_comments.
+// When the CommentCollector is destroyed, the last buffered comment goes into
+// next_leading_comments.
+class CommentCollector {
+ public:
+  CommentCollector(string* prev_trailing_comments,
+                   vector<string>* detached_comments,
+                   string* next_leading_comments)
+      : prev_trailing_comments_(prev_trailing_comments),
+        detached_comments_(detached_comments),
+        next_leading_comments_(next_leading_comments),
+        has_comment_(false),
+        is_line_comment_(false),
+        can_attach_to_prev_(true) {
+    if (prev_trailing_comments != NULL) prev_trailing_comments->clear();
+    if (detached_comments != NULL) detached_comments->clear();
+    if (next_leading_comments != NULL) next_leading_comments->clear();
+  }
+
+  ~CommentCollector() {
+    // Whatever is in the buffer is a leading comment.
+    if (next_leading_comments_ != NULL && has_comment_) {
+      comment_buffer_.swap(*next_leading_comments_);
+    }
+  }
+
+  // About to read a line comment.  Get the comment buffer pointer in order to
+  // read into it.
+  string* GetBufferForLineComment() {
+    // We want to combine with previous line comments, but not block comments.
+    if (has_comment_ && !is_line_comment_) {
+      Flush();
+    }
+    has_comment_ = true;
+    is_line_comment_ = true;
+    return &comment_buffer_;
+  }
+
+  // About to read a block comment.  Get the comment buffer pointer in order to
+  // read into it.
+  string* GetBufferForBlockComment() {
+    if (has_comment_) {
+      Flush();
+    }
+    has_comment_ = true;
+    is_line_comment_ = false;
+    return &comment_buffer_;
+  }
+
+  void ClearBuffer() {
+    comment_buffer_.clear();
+    has_comment_ = false;
+  }
+
+  // Called once we know that the comment buffer is complete and is *not*
+  // connected to the next token.
+  void Flush() {
+    if (has_comment_) {
+      if (can_attach_to_prev_) {
+        if (prev_trailing_comments_ != NULL) {
+          prev_trailing_comments_->append(comment_buffer_);
+        }
+        can_attach_to_prev_ = false;
+      } else {
+        if (detached_comments_ != NULL) {
+          detached_comments_->push_back(comment_buffer_);
+        }
+      }
+      ClearBuffer();
+    }
+  }
+
+  void DetachFromPrev() {
+    can_attach_to_prev_ = false;
+  }
+
+ private:
+  string* prev_trailing_comments_;
+  vector<string>* detached_comments_;
+  string* next_leading_comments_;
+
+  string comment_buffer_;
+
+  // True if any comments were read into comment_buffer_.  This can be true even
+  // if comment_buffer_ is empty, namely if the comment was "/**/".
+  bool has_comment_;
+
+  // Is the comment in the comment buffer a line comment?
+  bool is_line_comment_;
+
+  // Is it still possible that we could be reading a comment attached to the
+  // previous token?
+  bool can_attach_to_prev_;
+};
+
+} // namespace
+
+bool Tokenizer::NextWithComments(string* prev_trailing_comments,
+                                 vector<string>* detached_comments,
+                                 string* next_leading_comments) {
+  CommentCollector collector(prev_trailing_comments, detached_comments,
+                             next_leading_comments);
+
+  if (current_.type == TYPE_START) {
+    collector.DetachFromPrev();
+  } else {
+    // A comment appearing on the same line must be attached to the previous
+    // declaration.
+    ConsumeZeroOrMore<WhitespaceNoNewline>();
+    switch (TryConsumeCommentStart()) {
+      case LINE_COMMENT:
+        ConsumeLineComment(collector.GetBufferForLineComment());
+
+        // Don't allow comments on subsequent lines to be attached to a trailing
+        // comment.
+        collector.Flush();
+        break;
+      case BLOCK_COMMENT:
+        ConsumeBlockComment(collector.GetBufferForBlockComment());
+
+        ConsumeZeroOrMore<WhitespaceNoNewline>();
+        if (!TryConsume('\n')) {
+          // Oops, the next token is on the same line.  If we recorded a comment
+          // we really have no idea which token it should be attached to.
+          collector.ClearBuffer();
+          return Next();
+        }
+
+        // Don't allow comments on subsequent lines to be attached to a trailing
+        // comment.
+        collector.Flush();
+        break;
+      case SLASH_NOT_COMMENT:
+        return true;
+      case NO_COMMENT:
+        if (!TryConsume('\n')) {
+          // The next token is on the same line.  There are no comments.
+          return Next();
+        }
+        break;
+    }
+  }
+
+  // OK, we are now on the line *after* the previous token.
+  while (true) {
+    ConsumeZeroOrMore<WhitespaceNoNewline>();
+
+    switch (TryConsumeCommentStart()) {
+      case LINE_COMMENT:
+        ConsumeLineComment(collector.GetBufferForLineComment());
+        break;
+      case BLOCK_COMMENT:
+        ConsumeBlockComment(collector.GetBufferForBlockComment());
+
+        // Consume the rest of the line so that we don't interpret it as a
+        // blank line the next time around the loop.
+        ConsumeZeroOrMore<WhitespaceNoNewline>();
+        TryConsume('\n');
+        break;
+      case SLASH_NOT_COMMENT:
+        return true;
+      case NO_COMMENT:
+        if (TryConsume('\n')) {
+          // Completely blank line.
+          collector.Flush();
+          collector.DetachFromPrev();
+        } else {
+          bool result = Next();
+          if (!result ||
+              current_.text == "}" ||
+              current_.text == "]" ||
+              current_.text == ")") {
+            // It looks like we're at the end of a scope.  In this case it
+            // makes no sense to attach a comment to the following token.
+            collector.Flush();
+          }
+          return result;
+        }
+        break;
+    }
+  }
+}
+
 // -------------------------------------------------------------------
 // Token-parsing helpers.  Remember that these don't need to report
 // errors since any errors should already have been reported while
@@ -623,17 +910,138 @@ double Tokenizer::ParseFloat(const string& text) {
   return result;
 }
 
+// Helper to append a Unicode code point to a string as UTF8, without bringing
+// in any external dependencies.
+static void AppendUTF8(uint32 code_point, string* output) {
+  uint32 tmp = 0;
+  int len = 0;
+  if (code_point <= 0x7f) {
+    tmp = code_point;
+    len = 1;
+  } else if (code_point <= 0x07ff) {
+    tmp = 0x0000c080 |
+        ((code_point & 0x07c0) << 2) |
+        (code_point & 0x003f);
+    len = 2;
+  } else if (code_point <= 0xffff) {
+    tmp = 0x00e08080 |
+        ((code_point & 0xf000) << 4) |
+        ((code_point & 0x0fc0) << 2) |
+        (code_point & 0x003f);
+    len = 3;
+  } else if (code_point <= 0x1fffff) {
+    tmp = 0xf0808080 |
+        ((code_point & 0x1c0000) << 6) |
+        ((code_point & 0x03f000) << 4) |
+        ((code_point & 0x000fc0) << 2) |
+        (code_point & 0x003f);
+    len = 4;
+  } else {
+    // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
+    // normally only defined up to there as well.
+    StringAppendF(output, "\\U%08x", code_point);
+    return;
+  }
+  tmp = ghtonl(tmp);
+  output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
+}
+
+// Try to read <len> hex digits from ptr, and stuff the numeric result into
+// *result. Returns true if that many digits were successfully consumed.
+static bool ReadHexDigits(const char* ptr, int len, uint32* result) {
+  *result = 0;
+  if (len == 0) return false;
+  for (const char* end = ptr + len; ptr < end; ++ptr) {
+    if (*ptr == '\0') return false;
+    *result = (*result << 4) + DigitValue(*ptr);
+  }
+  return true;
+}
+
+// Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range
+// 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail
+// surrogate. These numbers are in a reserved range of Unicode code points, so
+// if we encounter such a pair we know how to parse it and convert it into a
+// single code point.
+static const uint32 kMinHeadSurrogate = 0xd800;
+static const uint32 kMaxHeadSurrogate = 0xdc00;
+static const uint32 kMinTrailSurrogate = 0xdc00;
+static const uint32 kMaxTrailSurrogate = 0xe000;
+
+static inline bool IsHeadSurrogate(uint32 code_point) {
+  return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate);
+}
+
+static inline bool IsTrailSurrogate(uint32 code_point) {
+  return (code_point >= kMinTrailSurrogate) &&
+      (code_point < kMaxTrailSurrogate);
+}
+
+// Combine a head and trail surrogate into a single Unicode code point.
+static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate) {
+  GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate));
+  GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate));
+  return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) |
+      (trail_surrogate - kMinTrailSurrogate));
+}
+
+// Convert the escape sequence parameter to a number of expected hex digits.
+static inline int UnicodeLength(char key) {
+  if (key == 'u') return 4;
+  if (key == 'U') return 8;
+  return 0;
+}
+
+// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt
+// to parse that sequence. On success, returns a pointer to the first char
+// beyond that sequence, and fills in *code_point. On failure, returns ptr
+// itself.
+static const char* FetchUnicodePoint(const char* ptr, uint32* code_point) {
+  const char* p = ptr;
+  // Fetch the code point.
+  const int len = UnicodeLength(*p++);
+  if (!ReadHexDigits(p, len, code_point))
+    return ptr;
+  p += len;
+
+  // Check if the code point we read is a "head surrogate." If so, then we
+  // expect it to be immediately followed by another code point which is a valid
+  // "trail surrogate," and together they form a UTF-16 pair which decodes into
+  // a single Unicode point. Trail surrogates may only use \u, not \U.
+  if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') {
+    uint32 trail_surrogate;
+    if (ReadHexDigits(p + 2, 4, &trail_surrogate) &&
+        IsTrailSurrogate(trail_surrogate)) {
+      *code_point = AssembleUTF16(*code_point, trail_surrogate);
+      p += 6;
+    }
+    // If this failed, then we just emit the head surrogate as a code point.
+    // It's bogus, but so is the string.
+  }
+
+  return p;
+}
+
+// The text string must begin and end with single or double quote
+// characters.
 void Tokenizer::ParseStringAppend(const string& text, string* output) {
-  // Reminder:  text[0] is always the quote character.  (If text is
-  //   empty, it's invalid, so we'll just return.)
-  if (text.empty()) {
+  // Reminder: text[0] is always a quote character.  (If text is
+  // empty, it's invalid, so we'll just return).
+  const size_t text_size = text.size();
+  if (text_size == 0) {
     GOOGLE_LOG(DFATAL)
       << " Tokenizer::ParseStringAppend() passed text that could not"
          " have been tokenized as a string: " << CEscape(text);
     return;
   }
 
-  output->reserve(output->size() + text.size());
+  // Reserve room for new string. The branch is necessary because if
+  // there is already space available the reserve() call might
+  // downsize the output.
+  const size_t new_len = text_size + output->size();
+  if (new_len > output->capacity()) {
+    output->reserve(new_len);
+  }
 
   // Loop through the string copying characters to "output" and
   // interpreting escape sequences.  Note that any invalid escape
@@ -671,19 +1079,47 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
         }
         output->push_back(static_cast<char>(code));
 
+      } else if (*ptr == 'u' || *ptr == 'U') {
+        uint32 unicode;
+        const char* end = FetchUnicodePoint(ptr, &unicode);
+        if (end == ptr) {
+          // Failure: Just dump out what we saw, don't try to parse it.
+          output->push_back(*ptr);
+        } else {
+          AppendUTF8(unicode, output);
+          ptr = end - 1;  // Because we're about to ++ptr.
+        }
       } else {
         // Some other escape code.
         output->push_back(TranslateEscape(*ptr));
       }
 
-    } else if (*ptr == text[0]) {
-      // Ignore quote matching the starting quote.
+    } else if (*ptr == text[0] && ptr[1] == '\0') {
+      // Ignore final quote matching the starting quote.
     } else {
       output->push_back(*ptr);
     }
   }
+}
 
-  return;
+template<typename CharacterClass>
+static bool AllInClass(const string& s) {
+  for (int i = 0; i < s.size(); ++i) {
+    if (!CharacterClass::InClass(s[i]))
+      return false;
+  }
+  return true;
+}
+
+bool Tokenizer::IsIdentifier(const string& text) {
+  // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+  if (text.size() == 0)
+    return false;
+  if (!Letter::InClass(text.at(0)))
+    return false;
+  if (!AllInClass<Alphanumeric>(text.substr(1)))
+    return false;
+  return true;
 }
 
 }  // namespace io
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index d115161..2f07116 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -38,6 +38,7 @@
 #define GOOGLE_PROTOBUF_IO_TOKENIZER_H__
 
 #include <string>
+#include <vector>
 #include <google/protobuf/stubs/common.h>
 
 namespace google {
@@ -66,7 +67,8 @@ class LIBPROTOBUF_EXPORT ErrorCollector {
   // Indicates that there was a warning in the input at the given line and
   // column numbers.  The numbers are zero-based, so you may want to add
   // 1 to each before printing them.
-  virtual void AddWarning(int line, int column, const string& message) { }
+  virtual void AddWarning(int /* line */, int /* column */,
+                          const string& /* message */) { }
 
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
@@ -122,16 +124,68 @@ class LIBPROTOBUF_EXPORT Tokenizer {
     // the token within the input stream.  They are zero-based.
     int line;
     int column;
+    int end_column;
   };
 
   // Get the current token.  This is updated when Next() is called.  Before
   // the first call to Next(), current() has type TYPE_START and no contents.
   const Token& current();
 
+  // Return the previous token -- i.e. what current() returned before the
+  // previous call to Next().
+  const Token& previous();
+
   // Advance to the next token.  Returns false if the end of the input is
   // reached.
   bool Next();
 
+  // Like Next(), but also collects comments which appear between the previous
+  // and next tokens.
+  //
+  // Comments which appear to be attached to the previous token are stored
+  // in *prev_tailing_comments.  Comments which appear to be attached to the
+  // next token are stored in *next_leading_comments.  Comments appearing in
+  // between which do not appear to be attached to either will be added to
+  // detached_comments.  Any of these parameters can be NULL to simply discard
+  // the comments.
+  //
+  // A series of line comments appearing on consecutive lines, with no other
+  // tokens appearing on those lines, will be treated as a single comment.
+  //
+  // Only the comment content is returned; comment markers (e.g. //) are
+  // stripped out.  For block comments, leading whitespace and an asterisk will
+  // be stripped from the beginning of each line other than the first.  Newlines
+  // are included in the output.
+  //
+  // Examples:
+  //
+  //   optional int32 foo = 1;  // Comment attached to foo.
+  //   // Comment attached to bar.
+  //   optional int32 bar = 2;
+  //
+  //   optional string baz = 3;
+  //   // Comment attached to baz.
+  //   // Another line attached to baz.
+  //
+  //   // Comment attached to qux.
+  //   //
+  //   // Another line attached to qux.
+  //   optional double qux = 4;
+  //
+  //   // Detached comment.  This is not attached to qux or corge
+  //   // because there are blank lines separating it from both.
+  //
+  //   optional string corge = 5;
+  //   /* Block comment attached
+  //    * to corge.  Leading asterisks
+  //    * will be removed. */
+  //   /* Block comment attached to
+  //    * grault. */
+  //   optional int32 grault = 6;
+  bool NextWithComments(string* prev_trailing_comments,
+                        vector<string>* detached_comments,
+                        string* next_leading_comments);
+
   // Parse helpers ---------------------------------------------------
 
   // Parses a TYPE_FLOAT token.  This never fails, so long as the text actually
@@ -175,11 +229,27 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   // Sets the comment style.
   void set_comment_style(CommentStyle style) { comment_style_ = style; }
 
+  // Whether to require whitespace between a number and a field name.
+  // Default is true. Do not use this; for Google-internal cleanup only.
+  void set_require_space_after_number(bool require) {
+    require_space_after_number_ = require;
+  }
+
+  // Whether to allow string literals to span multiple lines. Default is false.
+  // Do not use this; for Google-internal cleanup only.
+  void set_allow_multiline_strings(bool allow) {
+    allow_multiline_strings_ = allow;
+  }
+
+  // External helper: validate an identifier.
+  static bool IsIdentifier(const string& text);
+
   // -----------------------------------------------------------------
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
 
   Token current_;           // Returned by current().
+  Token previous_;          // Returned by previous().
 
   ZeroCopyInputStream* input_;
   ErrorCollector* error_collector_;
@@ -194,15 +264,18 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   int line_;
   int column_;
 
-  // Position in buffer_ where StartToken() was called.  If the token
-  // started in the previous buffer, this is zero, and current_.text already
-  // contains the part of the token from the previous buffer.  If not
-  // currently parsing a token, this is -1.
-  int token_start_;
+  // String to which text should be appended as we advance through it.
+  // Call RecordTo(&str) to start recording and StopRecording() to stop.
+  // E.g. StartToken() calls RecordTo(&current_.text).  record_start_ is the
+  // position within the current buffer where recording started.
+  string* record_target_;
+  int record_start_;
 
   // Options.
   bool allow_f_after_float_;
   CommentStyle comment_style_;
+  bool require_space_after_number_;
+  bool allow_multiline_strings_;
 
   // Since we count columns we need to interpret tabs somehow.  We'll take
   // the standard 8-character definition for lack of any way to do better.
@@ -217,6 +290,9 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   // Read a new buffer from the input.
   void Refresh();
 
+  inline void RecordTo(string* target);
+  inline void StopRecording();
+
   // Called when the current character is the first character of a new
   // token (not including whitespace or comments).
   inline void StartToken();
@@ -249,9 +325,28 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot);
 
   // Consume the rest of a line.
-  void ConsumeLineComment();
+  void ConsumeLineComment(string* content);
   // Consume until "*/".
-  void ConsumeBlockComment();
+  void ConsumeBlockComment(string* content);
+
+  enum NextCommentStatus {
+    // Started a line comment.
+    LINE_COMMENT,
+
+    // Started a block comment.
+    BLOCK_COMMENT,
+
+    // Consumed a slash, then realized it wasn't a comment.  current_ has
+    // been filled in with a slash token.  The caller should return it.
+    SLASH_NOT_COMMENT,
+
+    // We do not appear to be starting a comment here.
+    NO_COMMENT
+  };
+
+  // If we're at the start of a new comment, consume it and return what kind
+  // of comment it is.
+  NextCommentStatus TryConsumeCommentStart();
 
   // -----------------------------------------------------------------
   // These helper methods make the parsing code more readable.  The
@@ -291,6 +386,10 @@ inline const Tokenizer::Token& Tokenizer::current() {
   return current_;
 }
 
+inline const Tokenizer::Token& Tokenizer::previous() {
+  return previous_;
+}
+
 inline void Tokenizer::ParseString(const string& text, string* output) {
   output->clear();
   ParseStringAppend(text, output);
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index 358ec56..b39279b 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -32,9 +32,10 @@
 //  Based on original Protocol Buffers design by
 //  Sanjay Ghemawat, Jeff Dean, and others.
 
-#include <vector>
-#include <math.h>
 #include <limits.h>
+#include <math.h>
+
+#include <vector>
 
 #include <google/protobuf/io/tokenizer.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
@@ -257,6 +258,7 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
   EXPECT_EQ("", tokenizer.current().text);
   EXPECT_EQ(0, tokenizer.current().line);
   EXPECT_EQ(0, tokenizer.current().column);
+  EXPECT_EQ(0, tokenizer.current().end_column);
 
   // Parse the token.
   ASSERT_TRUE(tokenizer.Next());
@@ -268,6 +270,8 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
   // Check that it is located at the beginning of the input
   EXPECT_EQ(0, tokenizer.current().line);
   EXPECT_EQ(0, tokenizer.current().column);
+  EXPECT_EQ(kSimpleTokenCases_case.input.size(),
+            tokenizer.current().end_column);
 
   // There should be no more input.
   EXPECT_FALSE(tokenizer.Next());
@@ -277,6 +281,8 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
   EXPECT_EQ("", tokenizer.current().text);
   EXPECT_EQ(0, tokenizer.current().line);
   EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);
+  EXPECT_EQ(kSimpleTokenCases_case.input.size(),
+            tokenizer.current().end_column);
 
   // There should be no errors.
   EXPECT_TRUE(error_collector.text_.empty());
@@ -339,76 +345,77 @@ MultiTokenCase kMultiTokenCases[] = {
 
   // Test all token types at the same time.
   { "foo 1 1.2 + 'bar'", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo"  , 0,  0 },
-    { Tokenizer::TYPE_INTEGER   , "1"    , 0,  4 },
-    { Tokenizer::TYPE_FLOAT     , "1.2"  , 0,  6 },
-    { Tokenizer::TYPE_SYMBOL    , "+"    , 0, 10 },
-    { Tokenizer::TYPE_STRING    , "'bar'", 0, 12 },
-    { Tokenizer::TYPE_END       , ""     , 0, 17 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo"  , 0,  0,  3 },
+    { Tokenizer::TYPE_INTEGER   , "1"    , 0,  4,  5 },
+    { Tokenizer::TYPE_FLOAT     , "1.2"  , 0,  6,  9 },
+    { Tokenizer::TYPE_SYMBOL    , "+"    , 0, 10, 11 },
+    { Tokenizer::TYPE_STRING    , "'bar'", 0, 12, 17 },
+    { Tokenizer::TYPE_END       , ""     , 0, 17, 17 },
   }},
 
   // Test that consecutive symbols are parsed as separate tokens.
   { "!@+%", {
-    { Tokenizer::TYPE_SYMBOL    , "!"    , 0, 0 },
-    { Tokenizer::TYPE_SYMBOL    , "@"    , 0, 1 },
-    { Tokenizer::TYPE_SYMBOL    , "+"    , 0, 2 },
-    { Tokenizer::TYPE_SYMBOL    , "%"    , 0, 3 },
-    { Tokenizer::TYPE_END       , ""     , 0, 4 },
+    { Tokenizer::TYPE_SYMBOL    , "!"    , 0, 0, 1 },
+    { Tokenizer::TYPE_SYMBOL    , "@"    , 0, 1, 2 },
+    { Tokenizer::TYPE_SYMBOL    , "+"    , 0, 2, 3 },
+    { Tokenizer::TYPE_SYMBOL    , "%"    , 0, 3, 4 },
+    { Tokenizer::TYPE_END       , ""     , 0, 4, 4 },
   }},
 
   // Test that newlines affect line numbers correctly.
   { "foo bar\nrab oof", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0 },
-    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  4 },
-    { Tokenizer::TYPE_IDENTIFIER, "rab", 1,  0 },
-    { Tokenizer::TYPE_IDENTIFIER, "oof", 1,  4 },
-    { Tokenizer::TYPE_END       , ""   , 1,  7 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0, 3 },
+    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  4, 7 },
+    { Tokenizer::TYPE_IDENTIFIER, "rab", 1,  0, 3 },
+    { Tokenizer::TYPE_IDENTIFIER, "oof", 1,  4, 7 },
+    { Tokenizer::TYPE_END       , ""   , 1,  7, 7 },
   }},
 
   // Test that tabs affect column numbers correctly.
   { "foo\tbar  \tbaz", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0 },
-    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  8 },
-    { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16 },
-    { Tokenizer::TYPE_END       , ""   , 0, 19 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
+    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  8, 11 },
+    { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16, 19 },
+    { Tokenizer::TYPE_END       , ""   , 0, 19, 19 },
+  }},
+
+  // Test that tabs in string literals affect column numbers correctly.
+  { "\"foo\tbar\" baz", {
+    { Tokenizer::TYPE_STRING    , "\"foo\tbar\"", 0,  0, 12 },
+    { Tokenizer::TYPE_IDENTIFIER, "baz"         , 0, 13, 16 },
+    { Tokenizer::TYPE_END       , ""            , 0, 16, 16 },
   }},
 
   // Test that line comments are ignored.
   { "foo // This is a comment\n"
     "bar // This is another comment", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0 },
-    { Tokenizer::TYPE_IDENTIFIER, "bar", 1,  0 },
-    { Tokenizer::TYPE_END       , ""   , 1, 30 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
+    { Tokenizer::TYPE_IDENTIFIER, "bar", 1,  0,  3 },
+    { Tokenizer::TYPE_END       , ""   , 1, 30, 30 },
   }},
 
   // Test that block comments are ignored.
   { "foo /* This is a block comment */ bar", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0 },
-    { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34 },
-    { Tokenizer::TYPE_END       , ""   , 0, 37 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
+    { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34, 37 },
+    { Tokenizer::TYPE_END       , ""   , 0, 37, 37 },
   }},
 
   // Test that sh-style comments are not ignored by default.
   { "foo # bar\n"
     "baz", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0 },
-    { Tokenizer::TYPE_SYMBOL    , "#"  , 0,  4 },
-    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  6 },
-    { Tokenizer::TYPE_IDENTIFIER, "baz", 1,  0 },
-    { Tokenizer::TYPE_END       , ""   , 1, 3 },
-  }},
-
-  // Bytes with the high-order bit set should not be seen as control characters.
-  { "\300", {
-    { Tokenizer::TYPE_SYMBOL, "\300", 0, 0 },
-    { Tokenizer::TYPE_END   , ""    , 0, 1 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+    { Tokenizer::TYPE_SYMBOL    , "#"  , 0, 4, 5 },
+    { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6, 9 },
+    { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0, 3 },
+    { Tokenizer::TYPE_END       , ""   , 1, 3, 3 },
   }},
 
   // Test all whitespace chars
   { "foo\n\t\r\v\fbar", {
-    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0 },
-    { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11 },
-    { Tokenizer::TYPE_END       , ""   , 1, 14 },
+    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
+    { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11, 14 },
+    { Tokenizer::TYPE_END       , ""   , 1, 14, 14 },
   }},
 };
 
@@ -425,6 +432,7 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
   EXPECT_EQ("", tokenizer.current().text);
   EXPECT_EQ(0, tokenizer.current().line);
   EXPECT_EQ(0, tokenizer.current().column);
+  EXPECT_EQ(0, tokenizer.current().end_column);
 
   // Loop through all expected tokens.
   int i = 0;
@@ -434,6 +442,8 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
 
     SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text);
 
+    Tokenizer::Token previous = tokenizer.current();
+
     // Next() should only return false when it hits the end token.
     if (token.type != Tokenizer::TYPE_END) {
       ASSERT_TRUE(tokenizer.Next());
@@ -441,11 +451,19 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
       ASSERT_FALSE(tokenizer.Next());
     }
 
+    // Check that the previous token is set correctly.
+    EXPECT_EQ(previous.type, tokenizer.previous().type);
+    EXPECT_EQ(previous.text, tokenizer.previous().text);
+    EXPECT_EQ(previous.line, tokenizer.previous().line);
+    EXPECT_EQ(previous.column, tokenizer.previous().column);
+    EXPECT_EQ(previous.end_column, tokenizer.previous().end_column);
+
     // Check that the token matches the expected one.
     EXPECT_EQ(token.type, tokenizer.current().type);
     EXPECT_EQ(token.text, tokenizer.current().text);
     EXPECT_EQ(token.line, tokenizer.current().line);
     EXPECT_EQ(token.column, tokenizer.current().column);
+    EXPECT_EQ(token.end_column, tokenizer.current().end_column);
 
   } while (token.type != Tokenizer::TYPE_END);
 
@@ -491,6 +509,217 @@ TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
 
 // -------------------------------------------------------------------
 
+// In each case, the input is expected to have two tokens named "prev" and
+// "next" with comments in between.
+struct DocCommentCase {
+  string input;
+
+  const char* prev_trailing_comments;
+  const char* detached_comments[10];
+  const char* next_leading_comments;
+};
+
+inline ostream& operator<<(ostream& out,
+                           const DocCommentCase& test_case) {
+  return out << CEscape(test_case.input);
+}
+
+DocCommentCase kDocCommentCases[] = {
+  {
+    "prev next",
+
+    "",
+    {},
+    ""
+      },
+
+        {
+      "prev /* ignored */ next",
+
+      "",
+      {},
+      ""
+        },
+
+          {
+        "prev // trailing comment\n"
+            "next",
+
+            " trailing comment\n",
+            {},
+            ""
+          },
+
+            {
+          "prev\n"
+              "// leading comment\n"
+              "// line 2\n"
+              "next",
+
+              "",
+              {},
+              " leading comment\n"
+              " line 2\n"
+            },
+
+              {
+            "prev\n"
+                "// trailing comment\n"
+                "// line 2\n"
+                "\n"
+                "next",
+
+                " trailing comment\n"
+                " line 2\n",
+                {},
+                ""
+              },
+
+                {
+              "prev // trailing comment\n"
+                  "// leading comment\n"
+                  "// line 2\n"
+                  "next",
+
+                  " trailing comment\n",
+                  {},
+                  " leading comment\n"
+                  " line 2\n"
+                },
+
+                  {
+                "prev /* trailing block comment */\n"
+                    "/* leading block comment\n"
+                    " * line 2\n"
+                    " * line 3 */"
+                    "next",
+
+                    " trailing block comment ",
+                    {},
+                    " leading block comment\n"
+                    " line 2\n"
+                    " line 3 "
+                  },
+
+                    {
+                  "prev\n"
+                      "/* trailing block comment\n"
+                      " * line 2\n"
+                      " * line 3\n"
+                      " */\n"
+                      "/* leading block comment\n"
+                      " * line 2\n"
+                      " * line 3 */"
+                      "next",
+
+                      " trailing block comment\n"
+                      " line 2\n"
+                      " line 3\n",
+                      {},
+                      " leading block comment\n"
+                      " line 2\n"
+                      " line 3 "
+                    },
+
+                      {
+                    "prev\n"
+                        "// trailing comment\n"
+                        "\n"
+                        "// detached comment\n"
+                        "// line 2\n"
+                        "\n"
+                        "// second detached comment\n"
+                        "/* third detached comment\n"
+                        " * line 2 */\n"
+                        "// leading comment\n"
+                        "next",
+
+                        " trailing comment\n",
+                        {
+                      " detached comment\n"
+                          " line 2\n",
+                          " second detached comment\n",
+                          " third detached comment\n"
+                          " line 2 "
+                        },
+                          " leading comment\n"
+                        },
+
+                          {
+                        "prev /**/\n"
+                            "\n"
+                            "// detached comment\n"
+                            "\n"
+                            "// leading comment\n"
+                            "next",
+
+                            "",
+                            {
+                          " detached comment\n"
+                            },
+                              " leading comment\n"
+                            },
+
+                              {
+                            "prev /**/\n"
+                                "// leading comment\n"
+                                "next",
+
+                                "",
+                                {},
+                                " leading comment\n"
+                              },
+                              };
+
+TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
+  // Set up the tokenizer.
+  TestInputStream input(kDocCommentCases_case.input.data(),
+                        kDocCommentCases_case.input.size(),
+                        kBlockSizes_case);
+  TestErrorCollector error_collector;
+  Tokenizer tokenizer(&input, &error_collector);
+
+  // Set up a second tokenizer where we'll pass all NULLs to NextWithComments().
+  TestInputStream input2(kDocCommentCases_case.input.data(),
+                        kDocCommentCases_case.input.size(),
+                        kBlockSizes_case);
+  Tokenizer tokenizer2(&input2, &error_collector);
+
+  tokenizer.Next();
+  tokenizer2.Next();
+
+  EXPECT_EQ("prev", tokenizer.current().text);
+  EXPECT_EQ("prev", tokenizer2.current().text);
+
+  string prev_trailing_comments;
+  vector<string> detached_comments;
+  string next_leading_comments;
+  tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments,
+                             &next_leading_comments);
+  tokenizer2.NextWithComments(NULL, NULL, NULL);
+  EXPECT_EQ("next", tokenizer.current().text);
+  EXPECT_EQ("next", tokenizer2.current().text);
+
+  EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
+            prev_trailing_comments);
+
+  for (int i = 0; i < detached_comments.size(); i++) {
+    ASSERT_LT(i, GOOGLE_ARRAYSIZE(kDocCommentCases));
+    ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL);
+    EXPECT_EQ(kDocCommentCases_case.detached_comments[i],
+              detached_comments[i]);
+  }
+
+  // Verify that we matched all the detached comments.
+  EXPECT_EQ(NULL,
+      kDocCommentCases_case.detached_comments[detached_comments.size()]);
+
+  EXPECT_EQ(kDocCommentCases_case.next_leading_comments,
+            next_leading_comments);
+}
+
+// -------------------------------------------------------------------
+
 // Test parse helpers.  It's not really worth setting up a full data-driven
 // test here.
 TEST_F(TokenizerTest, ParseInteger) {
@@ -506,7 +735,7 @@ TEST_F(TokenizerTest, ParseInteger) {
   EXPECT_EQ(0, ParseInteger("0x"));
 
   uint64 i;
-#ifdef GTEST_HAS_DEATH_TEST  // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST  // death tests do not work on Windows yet
   // Test invalid integers that will never be tokenized as integers.
   EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i),
     "passed text that could not have been tokenized as an integer");
@@ -518,7 +747,7 @@ TEST_F(TokenizerTest, ParseInteger) {
     "passed text that could not have been tokenized as an integer");
   EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i),
     "passed text that could not have been tokenized as an integer");
-#endif  // GTEST_HAS_DEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 
   // Test overflows.
   EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
@@ -561,7 +790,7 @@ TEST_F(TokenizerTest, ParseFloat) {
   EXPECT_EQ(     0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
   EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
 
-#ifdef GTEST_HAS_DEATH_TEST  // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST  // death tests do not work on Windows yet
   // Test invalid integers that will never be tokenized as integers.
   EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
     "passed text that could not have been tokenized as a float");
@@ -569,7 +798,7 @@ TEST_F(TokenizerTest, ParseFloat) {
     "passed text that could not have been tokenized as a float");
   EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
     "passed text that could not have been tokenized as a float");
-#endif  // GTEST_HAS_DEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 }
 
 TEST_F(TokenizerTest, ParseString) {
@@ -591,11 +820,27 @@ TEST_F(TokenizerTest, ParseString) {
   Tokenizer::ParseString("'\\", &output);
   EXPECT_EQ("\\", output);
 
+  // Experiment with Unicode escapes. Here are one-, two- and three-byte Unicode
+  // characters.
+  Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\U00024b62XX'", &output);
+  EXPECT_EQ("$¢€𤭢XX", output);
+  // Same thing encoded using UTF16.
+  Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", &output);
+  EXPECT_EQ("$¢€𤭢XX", output);
+  // Here's some broken UTF16; there's a head surrogate with no tail surrogate.
+  // We just output this as if it were UTF8; it's not a defined code point, but
+  // it has a defined encoding.
+  Tokenizer::ParseString("'\\ud852XX'", &output);
+  EXPECT_EQ("\xed\xa1\x92XX", output);
+  // Malformed escape: Demons may fly out of the nose.
+  Tokenizer::ParseString("\\u0", &output);
+  EXPECT_EQ("u0", output);
+
   // Test invalid strings that will never be tokenized as strings.
-#ifdef GTEST_HAS_DEATH_TEST  // death tests do not work on Windows yet
+#ifdef PROTOBUF_HAS_DEATH_TEST  // death tests do not work on Windows yet
   EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
     "passed text that could not have been tokenized as a string");
-#endif  // GTEST_HAS_DEATH_TEST
+#endif  // PROTOBUF_HAS_DEATH_TEST
 }
 
 TEST_F(TokenizerTest, ParseStringAppend) {
@@ -632,9 +877,15 @@ ErrorCase kErrorCases[] = {
   { "'\\x' foo", true,
     "0:3: Expected hex digits for escape sequence.\n" },
   { "'foo", false,
-    "0:4: String literals cannot cross line boundaries.\n" },
+    "0:4: Unexpected end of string.\n" },
   { "'bar\nfoo", true,
     "0:4: String literals cannot cross line boundaries.\n" },
+  { "'\\u01' foo", true,
+    "0:5: Expected four hex digits for \\u escape sequence.\n" },
+  { "'\\u01' foo", true,
+    "0:5: Expected four hex digits for \\u escape sequence.\n" },
+  { "'\\uXYZ' foo", true,
+    "0:3: Expected four hex digits for \\u escape sequence.\n" },
 
   // Integer errors.
   { "123foo", true,
@@ -694,6 +945,10 @@ ErrorCase kErrorCases[] = {
     "0:0: Invalid control characters encountered in text.\n" },
   { string("\0\0foo", 5), true,
     "0:0: Invalid control characters encountered in text.\n" },
+
+  // Check error from high order bits set
+  { "\300foo", true,
+    "0:0: Interpreting non ascii codepoint 192.\n" },
 };
 
 TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
@@ -711,7 +966,7 @@ TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
   }
 
   // Check that the errors match what was expected.
-  EXPECT_EQ(error_collector.text_, kErrorCases_case.errors);
+  EXPECT_EQ(kErrorCases_case.errors, error_collector.text_);
 
   // If the error was recoverable, make sure we saw "foo" after it.
   if (kErrorCases_case.recoverable) {
@@ -737,6 +992,7 @@ TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {
   EXPECT_EQ(strlen("foo"), input.ByteCount());
 }
 
+
 }  // namespace
 }  // namespace io
 }  // namespace protobuf
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
index dad6ff1..4d53f29 100644
--- a/src/google/protobuf/io/zero_copy_stream.cc
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -34,6 +34,7 @@
 
 #include <google/protobuf/io/zero_copy_stream.h>
 
+#include <google/protobuf/stubs/common.h>
 
 namespace google {
 namespace protobuf {
@@ -43,6 +44,14 @@ ZeroCopyInputStream::~ZeroCopyInputStream() {}
 ZeroCopyOutputStream::~ZeroCopyOutputStream() {}
 
 
+bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */,
+                                           int /* size */) {
+  GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. "
+                "Reaching here usually means a ZeroCopyOutputStream "
+                "implementation bug.";
+  return false;
+}
+
 }  // namespace io
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h
index db5326f..f892122 100644
--- a/src/google/protobuf/io/zero_copy_stream.h
+++ b/src/google/protobuf/io/zero_copy_stream.h
@@ -226,6 +226,16 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream {
   // Returns the total number of bytes written since this object was created.
   virtual int64 ByteCount() const = 0;
 
+  // Write a given chunk of data to the output.  Some output streams may
+  // implement this in a way that avoids copying. Check AllowsAliasing() before
+  // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is
+  // called on a stream that does not allow aliasing.
+  //
+  // NOTE: It is caller's responsibility to ensure that the chunk of memory
+  // remains live until all of the data has been consumed from the stream.
+  virtual bool WriteAliasedRaw(const void* data, int size);
+  virtual bool AllowsAliasing() const { return false; }
+
 
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream);
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
index 1384c74..7829a29 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -46,7 +46,8 @@
 
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util-inl.h>
+#include <google/protobuf/stubs/stl_util.h>
+
 
 namespace google {
 namespace protobuf {
@@ -412,7 +413,9 @@ int64 ConcatenatingInputStream::ByteCount() const {
 
 LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input,
                                          int64 limit)
-  : input_(input), limit_(limit) {}
+  : input_(input), limit_(limit) {
+  prior_bytes_read_ = input_->ByteCount();
+}
 
 LimitingInputStream::~LimitingInputStream() {
   // If we overshot the limit, back up.
@@ -456,9 +459,9 @@ bool LimitingInputStream::Skip(int count) {
 
 int64 LimitingInputStream::ByteCount() const {
   if (limit_ < 0) {
-    return input_->ByteCount() + limit_;
+    return input_->ByteCount() + limit_ - prior_bytes_read_;
   } else {
-    return input_->ByteCount();
+    return input_->ByteCount() - prior_bytes_read_;
   }
 }
 
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
index 9fedb00..8382709 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -344,6 +344,7 @@ class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream {
  private:
   ZeroCopyInputStream* input_;
   int64 limit_;  // Decreases as we go, becomes negative if we overshoot.
+  int64 prior_bytes_read_;  // Bytes read on underlying stream at construction
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream);
 };
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
index e801251..d186a98 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -32,9 +32,13 @@
 //  Based on original Protocol Buffers design by
 //  Sanjay Ghemawat, Jeff Dean, and others.
 
-#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+#include <algorithm>
+#include <limits>
+
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util-inl.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 namespace google {
 namespace protobuf {
@@ -159,15 +163,23 @@ bool StringOutputStream::Next(void** data, int* size) {
     // without a memory allocation this way.
     STLStringResizeUninitialized(target_, target_->capacity());
   } else {
-    // Size has reached capacity, so double the size.  Also make sure
-    // that the new size is at least kMinimumSize.
+    // Size has reached capacity, try to double the size.
+    if (old_size > std::numeric_limits<int>::max() / 2) {
+      // Can not double the size otherwise it is going to cause integer
+      // overflow in the expression below: old_size * 2 ";
+      GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for "
+                 << "StringOutputStream.";
+      return false;
+    }
+    // Double the size, also make sure that the new size is at least
+    // kMinimumSize.
     STLStringResizeUninitialized(
       target_,
       max(old_size * 2,
           kMinimumSize + 0));  // "+ 0" works around GCC4 weirdness.
   }
 
-  *data = string_as_array(target_) + old_size;
+  *data = mutable_string_data(target_) + old_size;
   *size = target_->size() - old_size;
   return true;
 }
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
index 153f543..b980143 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
@@ -48,6 +48,7 @@
 #include <iosfwd>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 
 namespace google {
@@ -333,6 +334,19 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea
 
 // ===================================================================
 
+// Return a pointer to mutable characters underlying the given string.  The
+// return value is valid until the next time the string is resized.  We
+// trust the caller to treat the return value as an array of length s->size().
+inline char* mutable_string_data(string* s) {
+#ifdef LANG_CXX11
+  // This should be simpler & faster than string_as_array() because the latter
+  // is guaranteed to return NULL when *s is empty, so it has to check for that.
+  return &(*s)[0];
+#else
+  return string_as_array(s);
+#endif
+}
+
 }  // namespace io
 }  // namespace protobuf
 
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index 8229ee6..75eb2a4 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -61,6 +61,7 @@
 #include <sstream>
 
 #include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
 
 #if HAVE_ZLIB
 #include <google/protobuf/io/gzip_stream.h>
@@ -285,6 +286,57 @@ TEST_F(IoTest, ArrayIo) {
   }
 }
 
+TEST_F(IoTest, TwoSessionWrite) {
+  // Test that two concatenated write sessions read correctly
+
+  static const char* strA = "0123456789";
+  static const char* strB = "WhirledPeas";
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+  char* temp_buffer = new char[40];
+
+  for (int i = 0; i < kBlockSizeCount; i++) {
+    for (int j = 0; j < kBlockSizeCount; j++) {
+      ArrayOutputStream* output =
+          new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]);
+      CodedOutputStream* coded_output = new CodedOutputStream(output);
+      coded_output->WriteVarint32(strlen(strA));
+      coded_output->WriteRaw(strA, strlen(strA));
+      delete coded_output;  // flush
+      int64 pos = output->ByteCount();
+      delete output;
+      output = new ArrayOutputStream(
+          buffer + pos, kBufferSize - pos, kBlockSizes[i]);
+      coded_output = new CodedOutputStream(output);
+      coded_output->WriteVarint32(strlen(strB));
+      coded_output->WriteRaw(strB, strlen(strB));
+      delete coded_output;  // flush
+      int64 size = pos + output->ByteCount();
+      delete output;
+
+      ArrayInputStream* input =
+          new ArrayInputStream(buffer, size, kBlockSizes[j]);
+      CodedInputStream* coded_input = new CodedInputStream(input);
+      uint32 insize;
+      EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+      EXPECT_EQ(strlen(strA), insize);
+      EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+      EXPECT_EQ(0, memcmp(temp_buffer, strA, insize));
+
+      EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+      EXPECT_EQ(strlen(strB), insize);
+      EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+      EXPECT_EQ(0, memcmp(temp_buffer, strB, insize));
+
+      delete coded_input;
+      delete input;
+    }
+  }
+
+  delete [] temp_buffer;
+  delete [] buffer;
+}
+
 #if HAVE_ZLIB
 TEST_F(IoTest, GzipIo) {
   const int kBufferSize = 2*1024;
@@ -296,9 +348,49 @@ TEST_F(IoTest, GzipIo) {
         int size;
         {
           ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
-          GzipOutputStream gzout(
-              &output, GzipOutputStream::GZIP, gzip_buffer_size);
+          GzipOutputStream::Options options;
+          options.format = GzipOutputStream::GZIP;
+          if (gzip_buffer_size != -1) {
+            options.buffer_size = gzip_buffer_size;
+          }
+          GzipOutputStream gzout(&output, options);
+          WriteStuff(&gzout);
+          gzout.Close();
+          size = output.ByteCount();
+        }
+        {
+          ArrayInputStream input(buffer, size, kBlockSizes[j]);
+          GzipInputStream gzin(
+              &input, GzipInputStream::GZIP, gzip_buffer_size);
+          ReadStuff(&gzin);
+        }
+      }
+    }
+  }
+  delete [] buffer;
+}
+
+TEST_F(IoTest, GzipIoWithFlush) {
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+  // We start with i = 4 as we want a block size > 6. With block size <= 6
+  // Flush() fills up the entire 2K buffer with flush markers and the test
+  // fails. See documentation for Flush() for more detail.
+  for (int i = 4; i < kBlockSizeCount; i++) {
+    for (int j = 0; j < kBlockSizeCount; j++) {
+      for (int z = 0; z < kBlockSizeCount; z++) {
+        int gzip_buffer_size = kBlockSizes[z];
+        int size;
+        {
+          ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+          GzipOutputStream::Options options;
+          options.format = GzipOutputStream::GZIP;
+          if (gzip_buffer_size != -1) {
+            options.buffer_size = gzip_buffer_size;
+          }
+          GzipOutputStream gzout(&output, options);
           WriteStuff(&gzout);
+          EXPECT_TRUE(gzout.Flush());
           gzout.Close();
           size = output.ByteCount();
         }
@@ -314,6 +406,64 @@ TEST_F(IoTest, GzipIo) {
   delete [] buffer;
 }
 
+TEST_F(IoTest, GzipIoContiguousFlushes) {
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+
+  int block_size = kBlockSizes[4];
+  int gzip_buffer_size = block_size;
+  int size;
+
+  ArrayOutputStream output(buffer, kBufferSize, block_size);
+  GzipOutputStream::Options options;
+  options.format = GzipOutputStream::GZIP;
+  if (gzip_buffer_size != -1) {
+    options.buffer_size = gzip_buffer_size;
+  }
+  GzipOutputStream gzout(&output, options);
+  WriteStuff(&gzout);
+  EXPECT_TRUE(gzout.Flush());
+  EXPECT_TRUE(gzout.Flush());
+  gzout.Close();
+  size = output.ByteCount();
+
+  ArrayInputStream input(buffer, size, block_size);
+  GzipInputStream gzin(
+      &input, GzipInputStream::GZIP, gzip_buffer_size);
+  ReadStuff(&gzin);
+
+  delete [] buffer;
+}
+
+TEST_F(IoTest, GzipIoReadAfterFlush) {
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+
+  int block_size = kBlockSizes[4];
+  int gzip_buffer_size = block_size;
+  int size;
+  ArrayOutputStream output(buffer, kBufferSize, block_size);
+  GzipOutputStream::Options options;
+  options.format = GzipOutputStream::GZIP;
+  if (gzip_buffer_size != -1) {
+    options.buffer_size = gzip_buffer_size;
+  }
+
+  GzipOutputStream gzout(&output, options);
+  WriteStuff(&gzout);
+  EXPECT_TRUE(gzout.Flush());
+  size = output.ByteCount();
+
+  ArrayInputStream input(buffer, size, block_size);
+  GzipInputStream gzin(
+      &input, GzipInputStream::GZIP, gzip_buffer_size);
+  ReadStuff(&gzin);
+
+  gzout.Close();
+
+  delete [] buffer;
+}
+
 TEST_F(IoTest, ZlibIo) {
   const int kBufferSize = 2*1024;
   uint8* buffer = new uint8[kBufferSize];
@@ -324,8 +474,12 @@ TEST_F(IoTest, ZlibIo) {
         int size;
         {
           ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
-          GzipOutputStream gzout(
-              &output, GzipOutputStream::ZLIB, gzip_buffer_size);
+          GzipOutputStream::Options options;
+          options.format = GzipOutputStream::ZLIB;
+          if (gzip_buffer_size != -1) {
+            options.buffer_size = gzip_buffer_size;
+          }
+          GzipOutputStream gzout(&output, options);
           WriteStuff(&gzout);
           gzout.Close();
           size = output.ByteCount();
@@ -348,7 +502,9 @@ TEST_F(IoTest, ZlibIoInputAutodetect) {
   int size;
   {
     ArrayOutputStream output(buffer, kBufferSize);
-    GzipOutputStream gzout(&output, GzipOutputStream::ZLIB);
+    GzipOutputStream::Options options;
+    options.format = GzipOutputStream::ZLIB;
+    GzipOutputStream gzout(&output, options);
     WriteStuff(&gzout);
     gzout.Close();
     size = output.ByteCount();
@@ -360,7 +516,9 @@ TEST_F(IoTest, ZlibIoInputAutodetect) {
   }
   {
     ArrayOutputStream output(buffer, kBufferSize);
-    GzipOutputStream gzout(&output, GzipOutputStream::GZIP);
+    GzipOutputStream::Options options;
+    options.format = GzipOutputStream::GZIP;
+    GzipOutputStream gzout(&output, options);
     WriteStuff(&gzout);
     gzout.Close();
     size = output.ByteCount();
@@ -402,9 +560,10 @@ TEST_F(IoTest, CompressionOptions) {
   // Some ad-hoc testing of compression options.
 
   string golden;
-  File::ReadFileToStringOrDie(
-    TestSourceDir() + "/google/protobuf/testdata/golden_message",
-    &golden);
+  GOOGLE_CHECK_OK(File::GetContents(
+      TestSourceDir() +
+          "/google/protobuf/testdata/golden_message",
+      &golden, true));
 
   GzipOutputStream::Options options;
   string gzip_compressed = Compress(golden, options);
@@ -432,6 +591,71 @@ TEST_F(IoTest, CompressionOptions) {
   EXPECT_TRUE(Uncompress(gzip_compressed) == golden);
   EXPECT_TRUE(Uncompress(zlib_compressed) == golden);
 }
+
+TEST_F(IoTest, TwoSessionWriteGzip) {
+  // Test that two concatenated gzip streams can be read correctly
+
+  static const char* strA = "0123456789";
+  static const char* strB = "QuickBrownFox";
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+  char* temp_buffer = new char[40];
+
+  for (int i = 0; i < kBlockSizeCount; i++) {
+    for (int j = 0; j < kBlockSizeCount; j++) {
+      ArrayOutputStream* output =
+          new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]);
+      GzipOutputStream* gzout = new GzipOutputStream(output);
+      CodedOutputStream* coded_output = new CodedOutputStream(gzout);
+      int32 outlen = strlen(strA) + 1;
+      coded_output->WriteVarint32(outlen);
+      coded_output->WriteRaw(strA, outlen);
+      delete coded_output;  // flush
+      delete gzout;  // flush
+      int64 pos = output->ByteCount();
+      delete output;
+      output = new ArrayOutputStream(
+          buffer + pos, kBufferSize - pos, kBlockSizes[i]);
+      gzout = new GzipOutputStream(output);
+      coded_output = new CodedOutputStream(gzout);
+      outlen = strlen(strB) + 1;
+      coded_output->WriteVarint32(outlen);
+      coded_output->WriteRaw(strB, outlen);
+      delete coded_output;  // flush
+      delete gzout;  // flush
+      int64 size = pos + output->ByteCount();
+      delete output;
+
+      ArrayInputStream* input =
+          new ArrayInputStream(buffer, size, kBlockSizes[j]);
+      GzipInputStream* gzin = new GzipInputStream(input);
+      CodedInputStream* coded_input = new CodedInputStream(gzin);
+      uint32 insize;
+      EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+      EXPECT_EQ(strlen(strA) + 1, insize);
+      EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+      EXPECT_EQ(0, memcmp(temp_buffer, strA, insize))
+          << "strA=" << strA << " in=" << temp_buffer;
+
+      EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+      EXPECT_EQ(strlen(strB) + 1, insize);
+      EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+      EXPECT_EQ(0, memcmp(temp_buffer, strB, insize))
+          << " out_block_size=" << kBlockSizes[i]
+          << " in_block_size=" << kBlockSizes[j]
+          << " pos=" << pos
+          << " size=" << size
+          << " strB=" << strB << " in=" << temp_buffer;
+
+      delete coded_input;
+      delete gzin;
+      delete input;
+    }
+  }
+
+  delete [] temp_buffer;
+  delete [] buffer;
+}
 #endif
 
 // There is no string input, only string output.  Also, it doesn't support
@@ -700,6 +924,26 @@ TEST_F(IoTest, LimitingInputStream) {
   ReadStuff(&input);
 }
 
+// Checks that ByteCount works correctly for LimitingInputStreams where the
+// underlying stream has already been read.
+TEST_F(IoTest, LimitingInputStreamByteCount) {
+  const int kHalfBufferSize = 128;
+  const int kBufferSize = kHalfBufferSize * 2;
+  uint8 buffer[kBufferSize];
+
+  // Set up input. Only allow half to be read at once.
+  ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize);
+  const void* data;
+  int size;
+  EXPECT_TRUE(array_input.Next(&data, &size));
+  EXPECT_EQ(kHalfBufferSize, array_input.ByteCount());
+  // kHalfBufferSize - 1 to test limiting logic as well.
+  LimitingInputStream input(&array_input, kHalfBufferSize - 1);
+  EXPECT_EQ(0, input.ByteCount());
+  EXPECT_TRUE(input.Next(&data, &size));
+  EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount());
+}
+
 // Check that a zero-size array doesn't confuse the code.
 TEST(ZeroSizeArray, Input) {
   ArrayInputStream input(NULL, 0);