1 files changed, 688 insertions, 183 deletions
diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc
index 137cbce..84cdbb5 100644
--- a/src/google/protobuf/text_format.cc
+++ b/src/google/protobuf/text_format.cc
@@ -1,6 +1,6 @@
 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
-// http://code.google.com/p/protobuf/
+// https://developers.google.com/protocol-buffers/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -32,15 +32,18 @@
 //  Based on original Protocol Buffers design by
 //  Sanjay Ghemawat, Jeff Dean, and others.
 
+#include <algorithm>
 #include <float.h>
 #include <math.h>
 #include <stdio.h>
 #include <stack>
 #include <limits>
+#include <vector>
 
 #include <google/protobuf/text_format.h>
 
 #include <google/protobuf/descriptor.h>
+#include <google/protobuf/wire_format_lite.h>
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
@@ -48,10 +51,26 @@
 #include <google/protobuf/descriptor.pb.h>
 #include <google/protobuf/io/tokenizer.h>
 #include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/map_util.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 namespace google {
 namespace protobuf {
 
+namespace {
+
+inline bool IsHexNumber(const string& str) {
+  return (str.length() >= 2 && str[0] == '0' &&
+          (str[1] == 'x' || str[1] == 'X'));
+}
+
+inline bool IsOctNumber(const string& str) {
+  return (str.length() >= 2 && str[0] == '0' &&
+          (str[1] >= '0' && str[1] < '8'));
+}
+
+}  // namespace
+
 string Message::DebugString() const {
   string debug_string;
 
@@ -93,6 +112,73 @@ void Message::PrintDebugString() const {
 
 
 // ===========================================================================
+// Implementation of the parse information tree class.
+TextFormat::ParseInfoTree::ParseInfoTree() { }
+
+TextFormat::ParseInfoTree::~ParseInfoTree() {
+  // Remove any nested information trees, as they are owned by this tree.
+  for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
+    STLDeleteElements(&(it->second));
+  }
+}
+
+void TextFormat::ParseInfoTree::RecordLocation(
+    const FieldDescriptor* field,
+    TextFormat::ParseLocation location) {
+  locations_[field].push_back(location);
+}
+
+TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
+    const FieldDescriptor* field) {
+  // Owned by us in the map.
+  TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
+  vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
+  GOOGLE_CHECK(trees);
+  trees->push_back(instance);
+  return instance;
+}
+
+void CheckFieldIndex(const FieldDescriptor* field, int index) {
+  if (field == NULL) { return; }
+
+  if (field->is_repeated() && index == -1) {
+    GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
+                << "Field: " << field->name();
+  } else if (!field->is_repeated() && index != -1) {
+    GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
+                << "Field: " << field->name();
+  }
+}
+
+TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
+    const FieldDescriptor* field, int index) const {
+  CheckFieldIndex(field, index);
+  if (index == -1) { index = 0; }
+
+  const vector<TextFormat::ParseLocation>* locations =
+      FindOrNull(locations_, field);
+  if (locations == NULL || index >= locations->size()) {
+    return TextFormat::ParseLocation();
+  }
+
+  return (*locations)[index];
+}
+
+TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
+    const FieldDescriptor* field, int index) const {
+  CheckFieldIndex(field, index);
+  if (index == -1) { index = 0; }
+
+  const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
+  if (trees == NULL || index >= trees->size()) {
+    return NULL;
+  }
+
+  return (*trees)[index];
+}
+
+
+// ===========================================================================
 // Internal class for parsing an ASCII representation of a Protocol Message.
 // This class makes use of the Protocol Message compiler's tokenizer found
 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
@@ -107,9 +193,10 @@ void Message::PrintDebugString() const {
 class TextFormat::Parser::ParserImpl {
  public:
 
-  // Determines if repeated values for a non-repeated field are
-  // permitted, e.g., the string "foo: 1 foo: 2" for a
-  // required/optional field named "foo".
+  // Determines if repeated values for non-repeated fields and
+  // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
+  // required/optional field named "foo", or "baz: 1 qux: 2"
+  // where "baz" and "qux" are members of the same oneof.
   enum SingularOverwritePolicy {
     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
@@ -118,12 +205,25 @@ class TextFormat::Parser::ParserImpl {
   ParserImpl(const Descriptor* root_message_type,
              io::ZeroCopyInputStream* input_stream,
              io::ErrorCollector* error_collector,
-             SingularOverwritePolicy singular_overwrite_policy)
+             TextFormat::Finder* finder,
+             ParseInfoTree* parse_info_tree,
+             SingularOverwritePolicy singular_overwrite_policy,
+             bool allow_case_insensitive_field,
+             bool allow_unknown_field,
+             bool allow_unknown_enum,
+             bool allow_field_number,
+             bool allow_relaxed_whitespace)
     : error_collector_(error_collector),
+      finder_(finder),
+      parse_info_tree_(parse_info_tree),
       tokenizer_error_collector_(this),
       tokenizer_(input_stream, &tokenizer_error_collector_),
       root_message_type_(root_message_type),
       singular_overwrite_policy_(singular_overwrite_policy),
+      allow_case_insensitive_field_(allow_case_insensitive_field),
+      allow_unknown_field_(allow_unknown_field),
+      allow_unknown_enum_(allow_unknown_enum),
+      allow_field_number_(allow_field_number),
       had_errors_(false) {
     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
     // for floats.
@@ -132,6 +232,11 @@ class TextFormat::Parser::ParserImpl {
     // '#' starts a comment.
     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
 
+    if (allow_relaxed_whitespace) {
+      tokenizer_.set_require_space_after_number(false);
+      tokenizer_.set_allow_multiline_strings(true);
+    }
+
     // Consume the starting token.
     tokenizer_.Next();
   }
@@ -143,7 +248,7 @@ class TextFormat::Parser::ParserImpl {
   // GOOGLE_LOG(ERROR)).
   bool Parse(Message* output) {
     // Consume fields until we cannot do so anymore.
-    while(true) {
+    while (true) {
       if (LookingAtType(io::Tokenizer::TYPE_END)) {
         return !had_errors_;
       }
@@ -228,6 +333,7 @@ class TextFormat::Parser::ParserImpl {
     return true;
   }
 
+
   // Consumes the current field (as returned by the tokenizer) on the
   // passed in message.
   bool ConsumeField(Message* message) {
@@ -237,6 +343,8 @@ class TextFormat::Parser::ParserImpl {
     string field_name;
 
     const FieldDescriptor* field = NULL;
+    int start_line = tokenizer_.current().line;
+    int start_column = tokenizer_.current().column;
 
     if (TryConsume("[")) {
       // Extension.
@@ -249,72 +357,200 @@ class TextFormat::Parser::ParserImpl {
       }
       DO(Consume("]"));
 
-      field = reflection->FindKnownExtensionByName(field_name);
+      field = (finder_ != NULL
+               ? finder_->FindExtension(message, field_name)
+               : reflection->FindKnownExtensionByName(field_name));
 
       if (field == NULL) {
-        ReportError("Extension \"" + field_name + "\" is not defined or "
-                    "is not an extension of \"" +
-                    descriptor->full_name() + "\".");
-        return false;
+        if (!allow_unknown_field_) {
+          ReportError("Extension \"" + field_name + "\" is not defined or "
+                      "is not an extension of \"" +
+                      descriptor->full_name() + "\".");
+          return false;
+        } else {
+          ReportWarning("Extension \"" + field_name + "\" is not defined or "
+                        "is not an extension of \"" +
+                        descriptor->full_name() + "\".");
+        }
       }
     } else {
       DO(ConsumeIdentifier(&field_name));
 
-      field = descriptor->FindFieldByName(field_name);
-      // Group names are expected to be capitalized as they appear in the
-      // .proto file, which actually matches their type names, not their field
-      // names.
-      if (field == NULL) {
-        string lower_field_name = field_name;
-        LowerString(&lower_field_name);
-        field = descriptor->FindFieldByName(lower_field_name);
-        // If the case-insensitive match worked but the field is NOT a group,
-        if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
+      int32 field_number;
+      if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
+        if (descriptor->IsExtensionNumber(field_number)) {
+          field = reflection->FindKnownExtensionByNumber(field_number);
+        } else {
+          field = descriptor->FindFieldByNumber(field_number);
+        }
+      } else {
+        field = descriptor->FindFieldByName(field_name);
+        // Group names are expected to be capitalized as they appear in the
+        // .proto file, which actually matches their type names, not their
+        // field names.
+        if (field == NULL) {
+          string lower_field_name = field_name;
+          LowerString(&lower_field_name);
+          field = descriptor->FindFieldByName(lower_field_name);
+          // If the case-insensitive match worked but the field is NOT a group,
+          if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
+            field = NULL;
+          }
+        }
+        // Again, special-case group names as described above.
+        if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
+            && field->message_type()->name() != field_name) {
           field = NULL;
         }
-      }
-      // Again, special-case group names as described above.
-      if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
-          && field->message_type()->name() != field_name) {
-        field = NULL;
+
+        if (field == NULL && allow_case_insensitive_field_) {
+          string lower_field_name = field_name;
+          LowerString(&lower_field_name);
+          field = descriptor->FindFieldByLowercaseName(lower_field_name);
+        }
       }
 
       if (field == NULL) {
-        ReportError("Message type \"" + descriptor->full_name() +
-                    "\" has no field named \"" + field_name + "\".");
-        return false;
+        if (!allow_unknown_field_) {
+          ReportError("Message type \"" + descriptor->full_name() +
+                      "\" has no field named \"" + field_name + "\".");
+          return false;
+        } else {
+          ReportWarning("Message type \"" + descriptor->full_name() +
+                        "\" has no field named \"" + field_name + "\".");
+        }
       }
     }
 
-    // Fail if the field is not repeated and it has already been specified.
-    if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
-        !field->is_repeated() && reflection->HasField(*message, field)) {
-      ReportError("Non-repeated field \"" + field_name +
-                  "\" is specified multiple times.");
-      return false;
+    // Skips unknown field.
+    if (field == NULL) {
+      GOOGLE_CHECK(allow_unknown_field_);
+      // Try to guess the type of this field.
+      // If this field is not a message, there should be a ":" between the
+      // field name and the field value and also the field value should not
+      // start with "{" or "<" which indicates the begining of a message body.
+      // If there is no ":" or there is a "{" or "<" after ":", this field has
+      // to be a message or the input is ill-formed.
+      if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
+        return SkipFieldValue();
+      } else {
+        return SkipFieldMessage();
+      }
+    }
+
+    if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
+      // Fail if the field is not repeated and it has already been specified.
+      if (!field->is_repeated() && reflection->HasField(*message, field)) {
+        ReportError("Non-repeated field \"" + field_name +
+                    "\" is specified multiple times.");
+        return false;
+      }
+      // Fail if the field is a member of a oneof and another member has already
+      // been specified.
+      const OneofDescriptor* oneof = field->containing_oneof();
+      if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
+        const FieldDescriptor* other_field =
+            reflection->GetOneofFieldDescriptor(*message, oneof);
+        ReportError("Field \"" + field_name + "\" is specified along with "
+                    "field \"" + other_field->name() + "\", another member "
+                    "of oneof \"" + oneof->name() + "\".");
+        return false;
+      }
     }
 
     // Perform special handling for embedded message types.
     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
       // ':' is optional here.
       TryConsume(":");
-      DO(ConsumeFieldMessage(message, reflection, field));
     } else {
+      // ':' is required here.
       DO(Consume(":"));
+    }
+
+    if (field->is_repeated() && TryConsume("[")) {
+      // Short repeated format, e.g.  "foo: [1, 2, 3]"
+      while (true) {
+        if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
+          // Perform special handling for embedded message types.
+          DO(ConsumeFieldMessage(message, reflection, field));
+        } else {
+          DO(ConsumeFieldValue(message, reflection, field));
+        }
+        if (TryConsume("]")) {
+          break;
+        }
+        DO(Consume(","));
+      }
+    } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
+      DO(ConsumeFieldMessage(message, reflection, field));
+    } else {
       DO(ConsumeFieldValue(message, reflection, field));
     }
 
+    // For historical reasons, fields may optionally be separated by commas or
+    // semicolons.
+    TryConsume(";") || TryConsume(",");
+
     if (field->options().deprecated()) {
       ReportWarning("text format contains deprecated field \""
                     + field_name + "\"");
     }
 
+    // If a parse info tree exists, add the location for the parsed
+    // field.
+    if (parse_info_tree_ != NULL) {
+      RecordLocation(parse_info_tree_, field,
+                     ParseLocation(start_line, start_column));
+    }
+
+    return true;
+  }
+
+  // Skips the next field including the field's name and value.
+  bool SkipField() {
+    string field_name;
+    if (TryConsume("[")) {
+      // Extension name.
+      DO(ConsumeIdentifier(&field_name));
+      while (TryConsume(".")) {
+        string part;
+        DO(ConsumeIdentifier(&part));
+        field_name += ".";
+        field_name += part;
+      }
+      DO(Consume("]"));
+    } else {
+      DO(ConsumeIdentifier(&field_name));
+    }
+
+    // Try to guess the type of this field.
+    // If this field is not a message, there should be a ":" between the
+    // field name and the field value and also the field value should not
+    // start with "{" or "<" which indicates the begining of a message body.
+    // If there is no ":" or there is a "{" or "<" after ":", this field has
+    // to be a message or the input is ill-formed.
+    if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
+      DO(SkipFieldValue());
+    } else {
+      DO(SkipFieldMessage());
+    }
+    // For historical reasons, fields may optionally be separated by commas or
+    // semicolons.
+    TryConsume(";") || TryConsume(",");
     return true;
   }
 
   bool ConsumeFieldMessage(Message* message,
                            const Reflection* reflection,
                            const FieldDescriptor* field) {
+
+    // If the parse information tree is not NULL, create a nested one
+    // for the nested message.
+    ParseInfoTree* parent = parse_info_tree_;
+    if (parent != NULL) {
+      parse_info_tree_ = CreateNested(parent, field);
+    }
+
     string delimeter;
     if (TryConsume("<")) {
       delimeter = ">";
@@ -329,6 +565,26 @@ class TextFormat::Parser::ParserImpl {
       DO(ConsumeMessage(reflection->MutableMessage(message, field),
                         delimeter));
     }
+
+    // Reset the parse information tree.
+    parse_info_tree_ = parent;
+    return true;
+  }
+
+  // Skips the whole body of a message including the begining delimeter and
+  // the ending delimeter.
+  bool SkipFieldMessage() {
+    string delimeter;
+    if (TryConsume("<")) {
+      delimeter = ">";
+    } else {
+      DO(Consume("{"));
+      delimeter = "}";
+    }
+    while (!LookingAt(">") &&  !LookingAt("}")) {
+      DO(SkipField());
+    }
+    DO(Consume(delimeter));
     return true;
   }
 
@@ -397,34 +653,57 @@ class TextFormat::Parser::ParserImpl {
       }
 
       case FieldDescriptor::CPPTYPE_BOOL: {
-        string value;
-        DO(ConsumeIdentifier(&value));
-
-        if (value == "true") {
-          SET_FIELD(Bool, true);
-        } else if (value == "false") {
-          SET_FIELD(Bool, false);
+        if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+          uint64 value;
+          DO(ConsumeUnsignedInteger(&value, 1));
+          SET_FIELD(Bool, value);
         } else {
-          ReportError("Invalid value for boolean field \"" + field->name()
-                      + "\". Value: \"" + value  + "\".");
-          return false;
+          string value;
+          DO(ConsumeIdentifier(&value));
+          if (value == "true" || value == "True" || value == "t") {
+            SET_FIELD(Bool, true);
+          } else if (value == "false" || value == "False" || value == "f") {
+            SET_FIELD(Bool, false);
+          } else {
+            ReportError("Invalid value for boolean field \"" + field->name()
+                        + "\". Value: \"" + value  + "\".");
+            return false;
+          }
         }
         break;
       }
 
       case FieldDescriptor::CPPTYPE_ENUM: {
         string value;
-        DO(ConsumeIdentifier(&value));
-
-        // Find the enumeration value.
         const EnumDescriptor* enum_type = field->enum_type();
-        const EnumValueDescriptor* enum_value
-            = enum_type->FindValueByName(value);
+        const EnumValueDescriptor* enum_value = NULL;
+
+        if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+          DO(ConsumeIdentifier(&value));
+          // Find the enumeration value.
+          enum_value = enum_type->FindValueByName(value);
+
+        } else if (LookingAt("-") ||
+                   LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+          int64 int_value;
+          DO(ConsumeSignedInteger(&int_value, kint32max));
+          value = SimpleItoa(int_value);        // for error reporting
+          enum_value = enum_type->FindValueByNumber(int_value);
+        } else {
+          ReportError("Expected integer or identifier.");
+          return false;
+        }
 
         if (enum_value == NULL) {
-          ReportError("Unknown enumeration value of \"" + value  + "\" for "
-                      "field \"" + field->name() + "\".");
-          return false;
+          if (!allow_unknown_enum_) {
+            ReportError("Unknown enumeration value of \"" + value  + "\" for "
+                        "field \"" + field->name() + "\".");
+            return false;
+          } else {
+            ReportWarning("Unknown enumeration value of \"" + value  + "\" for "
+                          "field \"" + field->name() + "\".");
+            return true;
+          }
         }
 
         SET_FIELD(Enum, enum_value);
@@ -442,6 +721,60 @@ class TextFormat::Parser::ParserImpl {
     return true;
   }
 
+  bool SkipFieldValue() {
+    if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
+      while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
+        tokenizer_.Next();
+      }
+      return true;
+    }
+    // Possible field values other than string:
+    //   12345        => TYPE_INTEGER
+    //   -12345       => TYPE_SYMBOL + TYPE_INTEGER
+    //   1.2345       => TYPE_FLOAT
+    //   -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
+    //   inf          => TYPE_IDENTIFIER
+    //   -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
+    //   TYPE_INTEGER => TYPE_IDENTIFIER
+    // Divides them into two group, one with TYPE_SYMBOL
+    // and the other without:
+    //   Group one:
+    //     12345        => TYPE_INTEGER
+    //     1.2345       => TYPE_FLOAT
+    //     inf          => TYPE_IDENTIFIER
+    //     TYPE_INTEGER => TYPE_IDENTIFIER
+    //   Group two:
+    //     -12345       => TYPE_SYMBOL + TYPE_INTEGER
+    //     -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
+    //     -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
+    // As we can see, the field value consists of an optional '-' and one of
+    // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
+    bool has_minus = TryConsume("-");
+    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
+        !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
+        !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+      return false;
+    }
+    // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
+    // value while other combinations all generate valid values.
+    // We check if the value of this combination is valid here.
+    // TYPE_IDENTIFIER after a '-' should be one of the float values listed
+    // below:
+    //   inf, inff, infinity, nan
+    if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+      string text = tokenizer_.current().text;
+      LowerString(&text);
+      if (text != "inf" &&
+          text != "infinity" &&
+          text != "nan") {
+        ReportError("Invalid float number: " + text);
+        return false;
+      }
+    }
+    tokenizer_.Next();
+    return true;
+  }
+
   // Returns true if the current token's text is equal to that specified.
   bool LookingAt(const string& text) {
     return tokenizer_.current().text == text;
@@ -455,15 +788,23 @@ class TextFormat::Parser::ParserImpl {
   // Consumes an identifier and saves its value in the identifier parameter.
   // Returns false if the token is not of type IDENTFIER.
   bool ConsumeIdentifier(string* identifier) {
-    if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
-      ReportError("Expected identifier.");
-      return false;
+    if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+      *identifier = tokenizer_.current().text;
+      tokenizer_.Next();
+      return true;
     }
 
-    *identifier = tokenizer_.current().text;
+    // If allow_field_numer_ or allow_unknown_field_ is true, we should able
+    // to parse integer identifiers.
+    if ((allow_field_number_ || allow_unknown_field_)
+        && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+      *identifier = tokenizer_.current().text;
+      tokenizer_.Next();
+      return true;
+    }
 
-    tokenizer_.Next();
-    return true;
+    ReportError("Expected identifier.");
+    return false;
   }
 
   // Consumes a string and saves its value in the text parameter.
@@ -530,6 +871,29 @@ class TextFormat::Parser::ParserImpl {
     return true;
   }
 
+  // Consumes a uint64 and saves its value in the value parameter.
+  // Accepts decimal numbers only, rejects hex or oct numbers.
+  bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
+    if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+      ReportError("Expected integer.");
+      return false;
+    }
+
+    const string& text = tokenizer_.current().text;
+    if (IsHexNumber(text) || IsOctNumber(text)) {
+      ReportError("Expect a decimal number.");
+      return false;
+    }
+
+    if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
+      ReportError("Integer out of range.");
+      return false;
+    }
+
+    tokenizer_.Next();
+    return true;
+  }
+
   // Consumes a double and saves its value in the value parameter.
   // Note that since the tokenizer does not support negative numbers,
   // we actually may consume an additional token (for the minus sign) in this
@@ -547,7 +911,7 @@ class TextFormat::Parser::ParserImpl {
     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
       // We have found an integer value for the double.
       uint64 integer_value;
-      DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
+      DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
 
       *value = static_cast<double>(integer_value);
     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
@@ -559,7 +923,8 @@ class TextFormat::Parser::ParserImpl {
     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
       string text = tokenizer_.current().text;
       LowerString(&text);
-      if (text == "inf" || text == "infinity") {
+      if (text == "inf" ||
+          text == "infinity") {
         *value = std::numeric_limits<double>::infinity();
         tokenizer_.Next();
       } else if (text == "nan") {
@@ -616,7 +981,7 @@ class TextFormat::Parser::ParserImpl {
     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
         parser_(parser) { }
 
-    virtual ~ParserErrorCollector() { };
+    virtual ~ParserErrorCollector() { }
 
     virtual void AddError(int line, int column, const string& message) {
       parser_->ReportError(line, column, message);
@@ -632,10 +997,16 @@ class TextFormat::Parser::ParserImpl {
   };
 
   io::ErrorCollector* error_collector_;
+  TextFormat::Finder* finder_;
+  ParseInfoTree* parse_info_tree_;
   ParserErrorCollector tokenizer_error_collector_;
   io::Tokenizer tokenizer_;
   const Descriptor* root_message_type_;
   SingularOverwritePolicy singular_overwrite_policy_;
+  const bool allow_case_insensitive_field_;
+  const bool allow_unknown_field_;
+  const bool allow_unknown_enum_;
+  const bool allow_field_number_;
   bool had_errors_;
 };
 
@@ -661,7 +1032,7 @@ class TextFormat::Printer::TextGenerator {
   ~TextGenerator() {
     // Only BackUp() if we're sure we've successfully called Next() at least
     // once.
-    if (buffer_size_ > 0) {
+    if (!failed_ && buffer_size_ > 0) {
       output_->BackUp(buffer_size_);
     }
   }
@@ -765,17 +1136,39 @@ class TextFormat::Printer::TextGenerator {
 
 // ===========================================================================
 
+TextFormat::Finder::~Finder() {
+}
+
 TextFormat::Parser::Parser()
   : error_collector_(NULL),
-    allow_partial_(false) {}
+    finder_(NULL),
+    parse_info_tree_(NULL),
+    allow_partial_(false),
+    allow_case_insensitive_field_(false),
+    allow_unknown_field_(false),
+    allow_unknown_enum_(false),
+    allow_field_number_(false),
+    allow_relaxed_whitespace_(false),
+    allow_singular_overwrites_(false) {
+}
 
 TextFormat::Parser::~Parser() {}
 
 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
                                Message* output) {
   output->Clear();
+
+  ParserImpl::SingularOverwritePolicy overwrites_policy =
+      allow_singular_overwrites_
+      ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
+      : ParserImpl::FORBID_SINGULAR_OVERWRITES;
+
   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
-                    ParserImpl::FORBID_SINGULAR_OVERWRITES);
+                    finder_, parse_info_tree_,
+                    overwrites_policy,
+                    allow_case_insensitive_field_, allow_unknown_field_,
+                    allow_unknown_enum_, allow_field_number_,
+                    allow_relaxed_whitespace_);
   return MergeUsingImpl(input, output, &parser);
 }
 
@@ -788,7 +1181,11 @@ bool TextFormat::Parser::ParseFromString(const string& input,
 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
                                Message* output) {
   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
-                    ParserImpl::ALLOW_SINGULAR_OVERWRITES);
+                    finder_, parse_info_tree_,
+                    ParserImpl::ALLOW_SINGULAR_OVERWRITES,
+                    allow_case_insensitive_field_, allow_unknown_field_,
+                    allow_unknown_enum_, allow_field_number_,
+                    allow_relaxed_whitespace_);
   return MergeUsingImpl(input, output, &parser);
 }
 
@@ -798,7 +1195,7 @@ bool TextFormat::Parser::MergeFromString(const string& input,
   return Merge(&input_stream, output);
 }
 
-bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
+bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
                                         Message* output,
                                         ParserImpl* parser_impl) {
   if (!parser_impl->Parse(output)) return false;
@@ -806,7 +1203,7 @@ bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
     vector<string> missing_fields;
     output->FindInitializationErrors(&missing_fields);
     parser_impl->ReportError(-1, 0, "Message missing required fields: " +
-                                    JoinStrings(missing_fields, ", "));
+                                        Join(missing_fields, ", "));
     return false;
   }
   return true;
@@ -818,7 +1215,11 @@ bool TextFormat::Parser::ParseFieldValueFromString(
     Message* output) {
   io::ArrayInputStream input_stream(input.data(), input.size());
   ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
-                    ParserImpl::ALLOW_SINGULAR_OVERWRITES);
+                    finder_, parse_info_tree_,
+                    ParserImpl::ALLOW_SINGULAR_OVERWRITES,
+                    allow_case_insensitive_field_, allow_unknown_field_,
+                    allow_unknown_enum_, allow_field_number_,
+                    allow_relaxed_whitespace_);
   return parser.ParseField(field, output);
 }
 
@@ -844,29 +1245,138 @@ bool TextFormat::Parser::ParseFieldValueFromString(
 
 // ===========================================================================
 
+// The default implementation for FieldValuePrinter. The base class just
+// does simple formatting. That way, deriving classes could decide to fallback
+// to that behavior.
+TextFormat::FieldValuePrinter::FieldValuePrinter() {}
+TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
+string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
+  return val ? "true" : "false";
+}
+string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
+  return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
+  return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
+  return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
+  return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
+  return SimpleFtoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
+  return SimpleDtoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
+  return StrCat("\"", CEscape(val), "\"");
+}
+string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
+  return PrintString(val);
+}
+string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
+                                                const string& name) const {
+  return name;
+}
+string TextFormat::FieldValuePrinter::PrintFieldName(
+    const Message& message,
+    const Reflection* reflection,
+    const FieldDescriptor* field) const {
+  if (field->is_extension()) {
+    // We special-case MessageSet elements for compatibility with proto1.
+    if (field->containing_type()->options().message_set_wire_format()
+        && field->type() == FieldDescriptor::TYPE_MESSAGE
+        && field->is_optional()
+        && field->extension_scope() == field->message_type()) {
+      return StrCat("[", field->message_type()->full_name(), "]");
+    } else {
+      return StrCat("[", field->full_name(), "]");
+    }
+  } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
+    // Groups must be serialized with their original capitalization.
+    return field->message_type()->name();
+  } else {
+    return field->name();
+  }
+}
+string TextFormat::FieldValuePrinter::PrintMessageStart(
+    const Message& message,
+    int field_index,
+    int field_count,
+    bool single_line_mode) const {
+  return single_line_mode ? " { " : " {\n";
+}
+string TextFormat::FieldValuePrinter::PrintMessageEnd(
+    const Message& message,
+    int field_index,
+    int field_count,
+    bool single_line_mode) const {
+  return single_line_mode ? "} " : "}\n";
+}
+
+namespace {
+// Our own specialization: for UTF8 escaped strings.
+class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
+ public:
+  virtual string PrintString(const string& val) const {
+    return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
+  }
+  virtual string PrintBytes(const string& val) const {
+    return TextFormat::FieldValuePrinter::PrintString(val);
+  }
+};
+
+}  // namespace
+
 TextFormat::Printer::Printer()
   : initial_indent_level_(0),
     single_line_mode_(false),
+    use_field_number_(false),
     use_short_repeated_primitives_(false),
-    utf8_string_escaping_(false) {}
+    hide_unknown_fields_(false),
+    print_message_fields_in_index_order_(false) {
+  SetUseUtf8StringEscaping(false);
+}
 
-TextFormat::Printer::~Printer() {}
+TextFormat::Printer::~Printer() {
+  STLDeleteValues(&custom_printers_);
+}
+
+void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
+  SetDefaultFieldValuePrinter(as_utf8
+                              ? new FieldValuePrinterUtf8Escaping()
+                              : new FieldValuePrinter());
+}
+
+void TextFormat::Printer::SetDefaultFieldValuePrinter(
+    const FieldValuePrinter* printer) {
+  default_field_value_printer_.reset(printer);
+}
+
+bool TextFormat::Printer::RegisterFieldValuePrinter(
+    const FieldDescriptor* field,
+    const FieldValuePrinter* printer) {
+  return field != NULL
+      && printer != NULL
+      && custom_printers_.insert(make_pair(field, printer)).second;
+}
 
 bool TextFormat::Printer::PrintToString(const Message& message,
-                                        string* output) {
+                                        string* output) const {
   GOOGLE_DCHECK(output) << "output specified is NULL";
 
   output->clear();
   io::StringOutputStream output_stream(output);
 
-  bool result = Print(message, &output_stream);
-
-  return result;
+  return Print(message, &output_stream);
 }
 
 bool TextFormat::Printer::PrintUnknownFieldsToString(
     const UnknownFieldSet& unknown_fields,
-    string* output) {
+    string* output) const {
   GOOGLE_DCHECK(output) << "output specified is NULL";
 
   output->clear();
@@ -875,7 +1385,7 @@ bool TextFormat::Printer::PrintUnknownFieldsToString(
 }
 
 bool TextFormat::Printer::Print(const Message& message,
-                                io::ZeroCopyOutputStream* output) {
+                                io::ZeroCopyOutputStream* output) const {
   TextGenerator generator(output, initial_indent_level_);
 
   Print(message, generator);
@@ -886,7 +1396,7 @@ bool TextFormat::Printer::Print(const Message& message,
 
 bool TextFormat::Printer::PrintUnknownFields(
     const UnknownFieldSet& unknown_fields,
-    io::ZeroCopyOutputStream* output) {
+    io::ZeroCopyOutputStream* output) const {
   TextGenerator generator(output, initial_indent_level_);
 
   PrintUnknownFields(unknown_fields, generator);
@@ -895,22 +1405,37 @@ bool TextFormat::Printer::PrintUnknownFields(
   return !generator.failed();
 }
 
+namespace {
+// Comparison functor for sorting FieldDescriptors by field index.
+struct FieldIndexSorter {
+  bool operator()(const FieldDescriptor* left,
+                  const FieldDescriptor* right) const {
+    return left->index() < right->index();
+  }
+};
+}  // namespace
+
 void TextFormat::Printer::Print(const Message& message,
-                                TextGenerator& generator) {
+                                TextGenerator& generator) const {
   const Reflection* reflection = message.GetReflection();
   vector<const FieldDescriptor*> fields;
   reflection->ListFields(message, &fields);
+  if (print_message_fields_in_index_order_) {
+    sort(fields.begin(), fields.end(), FieldIndexSorter());
+  }
   for (int i = 0; i < fields.size(); i++) {
     PrintField(message, reflection, fields[i], generator);
   }
-  PrintUnknownFields(reflection->GetUnknownFields(message), generator);
+  if (!hide_unknown_fields_) {
+    PrintUnknownFields(reflection->GetUnknownFields(message), generator);
+  }
 }
 
 void TextFormat::Printer::PrintFieldValueToString(
     const Message& message,
     const FieldDescriptor* field,
     int index,
-    string* output) {
+    string* output) const {
 
   GOOGLE_DCHECK(output) << "output specified is NULL";
 
@@ -924,7 +1449,7 @@ void TextFormat::Printer::PrintFieldValueToString(
 void TextFormat::Printer::PrintField(const Message& message,
                                      const Reflection* reflection,
                                      const FieldDescriptor* field,
-                                     TextGenerator& generator) {
+                                     TextGenerator& generator) const {
   if (use_short_repeated_primitives_ &&
       field->is_repeated() &&
       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
@@ -942,35 +1467,30 @@ void TextFormat::Printer::PrintField(const Message& message,
   }
 
   for (int j = 0; j < count; ++j) {
+    const int field_index = field->is_repeated() ? j : -1;
+
     PrintFieldName(message, reflection, field, generator);
 
     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
-      if (single_line_mode_) {
-        generator.Print(" { ");
-      } else {
-        generator.Print(" {\n");
-        generator.Indent();
-      }
+      const FieldValuePrinter* printer = FindWithDefault(
+          custom_printers_, field, default_field_value_printer_.get());
+      const Message& sub_message =
+              field->is_repeated()
+              ? reflection->GetRepeatedMessage(message, field, j)
+              : reflection->GetMessage(message, field);
+      generator.Print(
+          printer->PrintMessageStart(
+              sub_message, field_index, count, single_line_mode_));
+      generator.Indent();
+      Print(sub_message, generator);
+      generator.Outdent();
+      generator.Print(
+          printer->PrintMessageEnd(
+              sub_message, field_index, count, single_line_mode_));
     } else {
       generator.Print(": ");
-    }
-
-    // Write the field value.
-    int field_index = j;
-    if (!field->is_repeated()) {
-      field_index = -1;
-    }
-
-    PrintFieldValue(message, reflection, field, field_index, generator);
-
-    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
-      if (single_line_mode_) {
-        generator.Print("} ");
-      } else {
-        generator.Outdent();
-        generator.Print("}\n");
-      }
-    } else {
+      // Write the field value.
+      PrintFieldValue(message, reflection, field, field_index, generator);
       if (single_line_mode_) {
         generator.Print(" ");
       } else {
@@ -980,10 +1500,11 @@ void TextFormat::Printer::PrintField(const Message& message,
   }
 }
 
-void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
-                                                  const Reflection* reflection,
-                                                  const FieldDescriptor* field,
-                                                  TextGenerator& generator) {
+void TextFormat::Printer::PrintShortRepeatedField(
+    const Message& message,
+    const Reflection* reflection,
+    const FieldDescriptor* field,
+    TextGenerator& generator) const {
   // Print primitive repeated field in short form.
   PrintFieldName(message, reflection, field, generator);
 
@@ -1003,27 +1524,17 @@ void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
 void TextFormat::Printer::PrintFieldName(const Message& message,
                                          const Reflection* reflection,
                                          const FieldDescriptor* field,
-                                         TextGenerator& generator) {
-  if (field->is_extension()) {
-    generator.Print("[");
-    // We special-case MessageSet elements for compatibility with proto1.
-    if (field->containing_type()->options().message_set_wire_format()
-        && field->type() == FieldDescriptor::TYPE_MESSAGE
-        && field->is_optional()
-        && field->extension_scope() == field->message_type()) {
-      generator.Print(field->message_type()->full_name());
-    } else {
-      generator.Print(field->full_name());
-    }
-    generator.Print("]");
-  } else {
-    if (field->type() == FieldDescriptor::TYPE_GROUP) {
-      // Groups must be serialized with their original capitalization.
-      generator.Print(field->message_type()->name());
-    } else {
-      generator.Print(field->name());
-    }
+                                         TextGenerator& generator) const {
+  // if use_field_number_ is true, prints field number instead
+  // of field name.
+  if (use_field_number_) {
+    generator.Print(SimpleItoa(field->number()));
+    return;
   }
+
+  const FieldValuePrinter* printer = FindWithDefault(
+      custom_printers_, field, default_field_value_printer_.get());
+  generator.Print(printer->PrintFieldName(message, reflection, field));
 }
 
 void TextFormat::Printer::PrintFieldValue(
@@ -1031,66 +1542,60 @@ void TextFormat::Printer::PrintFieldValue(
     const Reflection* reflection,
     const FieldDescriptor* field,
     int index,
-    TextGenerator& generator) {
+    TextGenerator& generator) const {
   GOOGLE_DCHECK(field->is_repeated() || (index == -1))
       << "Index must be -1 for non-repeated fields";
 
+  const FieldValuePrinter* printer
+      = FindWithDefault(custom_printers_, field,
+                        default_field_value_printer_.get());
+
   switch (field->cpp_type()) {
-#define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING)                             \
-      case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
-        generator.Print(TO_STRING(field->is_repeated() ?                     \
-          reflection->GetRepeated##METHOD(message, field, index) :           \
-          reflection->Get##METHOD(message, field)));                         \
-        break;                                                               \
-
-      OUTPUT_FIELD( INT32,  Int32, SimpleItoa);
-      OUTPUT_FIELD( INT64,  Int64, SimpleItoa);
-      OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
-      OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
-      OUTPUT_FIELD( FLOAT,  Float, SimpleFtoa);
-      OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
+#define OUTPUT_FIELD(CPPTYPE, METHOD)                                   \
+    case FieldDescriptor::CPPTYPE_##CPPTYPE:                            \
+      generator.Print(printer->Print##METHOD(field->is_repeated()       \
+               ? reflection->GetRepeated##METHOD(message, field, index) \
+               : reflection->Get##METHOD(message, field)));             \
+        break
+
+    OUTPUT_FIELD( INT32,  Int32);
+    OUTPUT_FIELD( INT64,  Int64);
+    OUTPUT_FIELD(UINT32, UInt32);
+    OUTPUT_FIELD(UINT64, UInt64);
+    OUTPUT_FIELD( FLOAT,  Float);
+    OUTPUT_FIELD(DOUBLE, Double);
+    OUTPUT_FIELD(  BOOL,   Bool);
 #undef OUTPUT_FIELD
 
-      case FieldDescriptor::CPPTYPE_STRING: {
-        string scratch;
-        const string& value = field->is_repeated() ?
-            reflection->GetRepeatedStringReference(
-              message, field, index, &scratch) :
-            reflection->GetStringReference(message, field, &scratch);
-
-        generator.Print("\"");
-        if (utf8_string_escaping_) {
-          generator.Print(strings::Utf8SafeCEscape(value));
-        } else {
-          generator.Print(CEscape(value));
-        }
-        generator.Print("\"");
-
-        break;
+    case FieldDescriptor::CPPTYPE_STRING: {
+      string scratch;
+      const string& value = field->is_repeated()
+          ? reflection->GetRepeatedStringReference(
+              message, field, index, &scratch)
+          : reflection->GetStringReference(message, field, &scratch);
+      if (field->type() == FieldDescriptor::TYPE_STRING) {
+        generator.Print(printer->PrintString(value));
+      } else {
+        GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
+        generator.Print(printer->PrintBytes(value));
       }
+      break;
+    }
 
-      case FieldDescriptor::CPPTYPE_BOOL:
-        if (field->is_repeated()) {
-          generator.Print(reflection->GetRepeatedBool(message, field, index)
-                          ? "true" : "false");
-        } else {
-          generator.Print(reflection->GetBool(message, field)
-                          ? "true" : "false");
-        }
-        break;
-
-      case FieldDescriptor::CPPTYPE_ENUM:
-        generator.Print(field->is_repeated() ?
-          reflection->GetRepeatedEnum(message, field, index)->name() :
-          reflection->GetEnum(message, field)->name());
-        break;
+    case FieldDescriptor::CPPTYPE_ENUM: {
+      const EnumValueDescriptor *enum_val = field->is_repeated()
+          ? reflection->GetRepeatedEnum(message, field, index)
+          : reflection->GetEnum(message, field);
+      generator.Print(printer->PrintEnum(enum_val->number(), enum_val->name()));
+      break;
+    }
 
-      case FieldDescriptor::CPPTYPE_MESSAGE:
-        Print(field->is_repeated() ?
-                reflection->GetRepeatedMessage(message, field, index) :
-                reflection->GetMessage(message, field),
-              generator);
-        break;
+    case FieldDescriptor::CPPTYPE_MESSAGE:
+      Print(field->is_repeated()
+            ? reflection->GetRepeatedMessage(message, field, index)
+            : reflection->GetMessage(message, field),
+            generator);
+      break;
   }
 }
 
@@ -1143,7 +1648,7 @@ static string PaddedHex(IntType value) {
 }
 
 void TextFormat::Printer::PrintUnknownFields(
-    const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
+    const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
   for (int i = 0; i < unknown_fields.field_count(); i++) {
     const UnknownField& field = unknown_fields.field(i);
     string field_number = SimpleItoa(field.number());