diff options
Diffstat (limited to 'src/google/protobuf/text_format.cc')
-rw-r--r-- | src/google/protobuf/text_format.cc | 871 |
1 files changed, 688 insertions, 183 deletions
diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc index 137cbce..84cdbb5 100644 --- a/src/google/protobuf/text_format.cc +++ b/src/google/protobuf/text_format.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// http://code.google.com/p/protobuf/ +// https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -32,15 +32,18 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. +#include <algorithm> #include <float.h> #include <math.h> #include <stdio.h> #include <stack> #include <limits> +#include <vector> #include <google/protobuf/text_format.h> #include <google/protobuf/descriptor.h> +#include <google/protobuf/wire_format_lite.h> #include <google/protobuf/io/coded_stream.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/io/zero_copy_stream_impl.h> @@ -48,10 +51,26 @@ #include <google/protobuf/descriptor.pb.h> #include <google/protobuf/io/tokenizer.h> #include <google/protobuf/stubs/strutil.h> +#include <google/protobuf/stubs/map_util.h> +#include <google/protobuf/stubs/stl_util.h> namespace google { namespace protobuf { +namespace { + +inline bool IsHexNumber(const string& str) { + return (str.length() >= 2 && str[0] == '0' && + (str[1] == 'x' || str[1] == 'X')); +} + +inline bool IsOctNumber(const string& str) { + return (str.length() >= 2 && str[0] == '0' && + (str[1] >= '0' && str[1] < '8')); +} + +} // namespace + string Message::DebugString() const { string debug_string; @@ -93,6 +112,73 @@ void Message::PrintDebugString() const { // =========================================================================== +// Implementation of the parse information tree class. +TextFormat::ParseInfoTree::ParseInfoTree() { } + +TextFormat::ParseInfoTree::~ParseInfoTree() { + // Remove any nested information trees, as they are owned by this tree. + for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) { + STLDeleteElements(&(it->second)); + } +} + +void TextFormat::ParseInfoTree::RecordLocation( + const FieldDescriptor* field, + TextFormat::ParseLocation location) { + locations_[field].push_back(location); +} + +TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested( + const FieldDescriptor* field) { + // Owned by us in the map. + TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree(); + vector<TextFormat::ParseInfoTree*>* trees = &nested_[field]; + GOOGLE_CHECK(trees); + trees->push_back(instance); + return instance; +} + +void CheckFieldIndex(const FieldDescriptor* field, int index) { + if (field == NULL) { return; } + + if (field->is_repeated() && index == -1) { + GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. " + << "Field: " << field->name(); + } else if (!field->is_repeated() && index != -1) { + GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields." + << "Field: " << field->name(); + } +} + +TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation( + const FieldDescriptor* field, int index) const { + CheckFieldIndex(field, index); + if (index == -1) { index = 0; } + + const vector<TextFormat::ParseLocation>* locations = + FindOrNull(locations_, field); + if (locations == NULL || index >= locations->size()) { + return TextFormat::ParseLocation(); + } + + return (*locations)[index]; +} + +TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested( + const FieldDescriptor* field, int index) const { + CheckFieldIndex(field, index); + if (index == -1) { index = 0; } + + const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field); + if (trees == NULL || index >= trees->size()) { + return NULL; + } + + return (*trees)[index]; +} + + +// =========================================================================== // Internal class for parsing an ASCII representation of a Protocol Message. // This class makes use of the Protocol Message compiler's tokenizer found // in //google/protobuf/io/tokenizer.h. Note that class's Parse @@ -107,9 +193,10 @@ void Message::PrintDebugString() const { class TextFormat::Parser::ParserImpl { public: - // Determines if repeated values for a non-repeated field are - // permitted, e.g., the string "foo: 1 foo: 2" for a - // required/optional field named "foo". + // Determines if repeated values for non-repeated fields and + // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a + // required/optional field named "foo", or "baz: 1 qux: 2" + // where "baz" and "qux" are members of the same oneof. enum SingularOverwritePolicy { ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained FORBID_SINGULAR_OVERWRITES = 1, // an error is issued @@ -118,12 +205,25 @@ class TextFormat::Parser::ParserImpl { ParserImpl(const Descriptor* root_message_type, io::ZeroCopyInputStream* input_stream, io::ErrorCollector* error_collector, - SingularOverwritePolicy singular_overwrite_policy) + TextFormat::Finder* finder, + ParseInfoTree* parse_info_tree, + SingularOverwritePolicy singular_overwrite_policy, + bool allow_case_insensitive_field, + bool allow_unknown_field, + bool allow_unknown_enum, + bool allow_field_number, + bool allow_relaxed_whitespace) : error_collector_(error_collector), + finder_(finder), + parse_info_tree_(parse_info_tree), tokenizer_error_collector_(this), tokenizer_(input_stream, &tokenizer_error_collector_), root_message_type_(root_message_type), singular_overwrite_policy_(singular_overwrite_policy), + allow_case_insensitive_field_(allow_case_insensitive_field), + allow_unknown_field_(allow_unknown_field), + allow_unknown_enum_(allow_unknown_enum), + allow_field_number_(allow_field_number), had_errors_(false) { // For backwards-compatibility with proto1, we need to allow the 'f' suffix // for floats. @@ -132,6 +232,11 @@ class TextFormat::Parser::ParserImpl { // '#' starts a comment. tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE); + if (allow_relaxed_whitespace) { + tokenizer_.set_require_space_after_number(false); + tokenizer_.set_allow_multiline_strings(true); + } + // Consume the starting token. tokenizer_.Next(); } @@ -143,7 +248,7 @@ class TextFormat::Parser::ParserImpl { // GOOGLE_LOG(ERROR)). bool Parse(Message* output) { // Consume fields until we cannot do so anymore. - while(true) { + while (true) { if (LookingAtType(io::Tokenizer::TYPE_END)) { return !had_errors_; } @@ -228,6 +333,7 @@ class TextFormat::Parser::ParserImpl { return true; } + // Consumes the current field (as returned by the tokenizer) on the // passed in message. bool ConsumeField(Message* message) { @@ -237,6 +343,8 @@ class TextFormat::Parser::ParserImpl { string field_name; const FieldDescriptor* field = NULL; + int start_line = tokenizer_.current().line; + int start_column = tokenizer_.current().column; if (TryConsume("[")) { // Extension. @@ -249,72 +357,200 @@ class TextFormat::Parser::ParserImpl { } DO(Consume("]")); - field = reflection->FindKnownExtensionByName(field_name); + field = (finder_ != NULL + ? finder_->FindExtension(message, field_name) + : reflection->FindKnownExtensionByName(field_name)); if (field == NULL) { - ReportError("Extension \"" + field_name + "\" is not defined or " - "is not an extension of \"" + - descriptor->full_name() + "\"."); - return false; + if (!allow_unknown_field_) { + ReportError("Extension \"" + field_name + "\" is not defined or " + "is not an extension of \"" + + descriptor->full_name() + "\"."); + return false; + } else { + ReportWarning("Extension \"" + field_name + "\" is not defined or " + "is not an extension of \"" + + descriptor->full_name() + "\"."); + } } } else { DO(ConsumeIdentifier(&field_name)); - field = descriptor->FindFieldByName(field_name); - // Group names are expected to be capitalized as they appear in the - // .proto file, which actually matches their type names, not their field - // names. - if (field == NULL) { - string lower_field_name = field_name; - LowerString(&lower_field_name); - field = descriptor->FindFieldByName(lower_field_name); - // If the case-insensitive match worked but the field is NOT a group, - if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) { + int32 field_number; + if (allow_field_number_ && safe_strto32(field_name, &field_number)) { + if (descriptor->IsExtensionNumber(field_number)) { + field = reflection->FindKnownExtensionByNumber(field_number); + } else { + field = descriptor->FindFieldByNumber(field_number); + } + } else { + field = descriptor->FindFieldByName(field_name); + // Group names are expected to be capitalized as they appear in the + // .proto file, which actually matches their type names, not their + // field names. + if (field == NULL) { + string lower_field_name = field_name; + LowerString(&lower_field_name); + field = descriptor->FindFieldByName(lower_field_name); + // If the case-insensitive match worked but the field is NOT a group, + if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) { + field = NULL; + } + } + // Again, special-case group names as described above. + if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP + && field->message_type()->name() != field_name) { field = NULL; } - } - // Again, special-case group names as described above. - if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP - && field->message_type()->name() != field_name) { - field = NULL; + + if (field == NULL && allow_case_insensitive_field_) { + string lower_field_name = field_name; + LowerString(&lower_field_name); + field = descriptor->FindFieldByLowercaseName(lower_field_name); + } } if (field == NULL) { - ReportError("Message type \"" + descriptor->full_name() + - "\" has no field named \"" + field_name + "\"."); - return false; + if (!allow_unknown_field_) { + ReportError("Message type \"" + descriptor->full_name() + + "\" has no field named \"" + field_name + "\"."); + return false; + } else { + ReportWarning("Message type \"" + descriptor->full_name() + + "\" has no field named \"" + field_name + "\"."); + } } } - // Fail if the field is not repeated and it has already been specified. - if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) && - !field->is_repeated() && reflection->HasField(*message, field)) { - ReportError("Non-repeated field \"" + field_name + - "\" is specified multiple times."); - return false; + // Skips unknown field. + if (field == NULL) { + GOOGLE_CHECK(allow_unknown_field_); + // Try to guess the type of this field. + // If this field is not a message, there should be a ":" between the + // field name and the field value and also the field value should not + // start with "{" or "<" which indicates the begining of a message body. + // If there is no ":" or there is a "{" or "<" after ":", this field has + // to be a message or the input is ill-formed. + if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { + return SkipFieldValue(); + } else { + return SkipFieldMessage(); + } + } + + if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) { + // Fail if the field is not repeated and it has already been specified. + if (!field->is_repeated() && reflection->HasField(*message, field)) { + ReportError("Non-repeated field \"" + field_name + + "\" is specified multiple times."); + return false; + } + // Fail if the field is a member of a oneof and another member has already + // been specified. + const OneofDescriptor* oneof = field->containing_oneof(); + if (oneof != NULL && reflection->HasOneof(*message, oneof)) { + const FieldDescriptor* other_field = + reflection->GetOneofFieldDescriptor(*message, oneof); + ReportError("Field \"" + field_name + "\" is specified along with " + "field \"" + other_field->name() + "\", another member " + "of oneof \"" + oneof->name() + "\"."); + return false; + } } // Perform special handling for embedded message types. if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { // ':' is optional here. TryConsume(":"); - DO(ConsumeFieldMessage(message, reflection, field)); } else { + // ':' is required here. DO(Consume(":")); + } + + if (field->is_repeated() && TryConsume("[")) { + // Short repeated format, e.g. "foo: [1, 2, 3]" + while (true) { + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + // Perform special handling for embedded message types. + DO(ConsumeFieldMessage(message, reflection, field)); + } else { + DO(ConsumeFieldValue(message, reflection, field)); + } + if (TryConsume("]")) { + break; + } + DO(Consume(",")); + } + } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + DO(ConsumeFieldMessage(message, reflection, field)); + } else { DO(ConsumeFieldValue(message, reflection, field)); } + // For historical reasons, fields may optionally be separated by commas or + // semicolons. + TryConsume(";") || TryConsume(","); + if (field->options().deprecated()) { ReportWarning("text format contains deprecated field \"" + field_name + "\""); } + // If a parse info tree exists, add the location for the parsed + // field. + if (parse_info_tree_ != NULL) { + RecordLocation(parse_info_tree_, field, + ParseLocation(start_line, start_column)); + } + + return true; + } + + // Skips the next field including the field's name and value. + bool SkipField() { + string field_name; + if (TryConsume("[")) { + // Extension name. + DO(ConsumeIdentifier(&field_name)); + while (TryConsume(".")) { + string part; + DO(ConsumeIdentifier(&part)); + field_name += "."; + field_name += part; + } + DO(Consume("]")); + } else { + DO(ConsumeIdentifier(&field_name)); + } + + // Try to guess the type of this field. + // If this field is not a message, there should be a ":" between the + // field name and the field value and also the field value should not + // start with "{" or "<" which indicates the begining of a message body. + // If there is no ":" or there is a "{" or "<" after ":", this field has + // to be a message or the input is ill-formed. + if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { + DO(SkipFieldValue()); + } else { + DO(SkipFieldMessage()); + } + // For historical reasons, fields may optionally be separated by commas or + // semicolons. + TryConsume(";") || TryConsume(","); return true; } bool ConsumeFieldMessage(Message* message, const Reflection* reflection, const FieldDescriptor* field) { + + // If the parse information tree is not NULL, create a nested one + // for the nested message. + ParseInfoTree* parent = parse_info_tree_; + if (parent != NULL) { + parse_info_tree_ = CreateNested(parent, field); + } + string delimeter; if (TryConsume("<")) { delimeter = ">"; @@ -329,6 +565,26 @@ class TextFormat::Parser::ParserImpl { DO(ConsumeMessage(reflection->MutableMessage(message, field), delimeter)); } + + // Reset the parse information tree. + parse_info_tree_ = parent; + return true; + } + + // Skips the whole body of a message including the begining delimeter and + // the ending delimeter. + bool SkipFieldMessage() { + string delimeter; + if (TryConsume("<")) { + delimeter = ">"; + } else { + DO(Consume("{")); + delimeter = "}"; + } + while (!LookingAt(">") && !LookingAt("}")) { + DO(SkipField()); + } + DO(Consume(delimeter)); return true; } @@ -397,34 +653,57 @@ class TextFormat::Parser::ParserImpl { } case FieldDescriptor::CPPTYPE_BOOL: { - string value; - DO(ConsumeIdentifier(&value)); - - if (value == "true") { - SET_FIELD(Bool, true); - } else if (value == "false") { - SET_FIELD(Bool, false); + if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { + uint64 value; + DO(ConsumeUnsignedInteger(&value, 1)); + SET_FIELD(Bool, value); } else { - ReportError("Invalid value for boolean field \"" + field->name() - + "\". Value: \"" + value + "\"."); - return false; + string value; + DO(ConsumeIdentifier(&value)); + if (value == "true" || value == "True" || value == "t") { + SET_FIELD(Bool, true); + } else if (value == "false" || value == "False" || value == "f") { + SET_FIELD(Bool, false); + } else { + ReportError("Invalid value for boolean field \"" + field->name() + + "\". Value: \"" + value + "\"."); + return false; + } } break; } case FieldDescriptor::CPPTYPE_ENUM: { string value; - DO(ConsumeIdentifier(&value)); - - // Find the enumeration value. const EnumDescriptor* enum_type = field->enum_type(); - const EnumValueDescriptor* enum_value - = enum_type->FindValueByName(value); + const EnumValueDescriptor* enum_value = NULL; + + if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { + DO(ConsumeIdentifier(&value)); + // Find the enumeration value. + enum_value = enum_type->FindValueByName(value); + + } else if (LookingAt("-") || + LookingAtType(io::Tokenizer::TYPE_INTEGER)) { + int64 int_value; + DO(ConsumeSignedInteger(&int_value, kint32max)); + value = SimpleItoa(int_value); // for error reporting + enum_value = enum_type->FindValueByNumber(int_value); + } else { + ReportError("Expected integer or identifier."); + return false; + } if (enum_value == NULL) { - ReportError("Unknown enumeration value of \"" + value + "\" for " - "field \"" + field->name() + "\"."); - return false; + if (!allow_unknown_enum_) { + ReportError("Unknown enumeration value of \"" + value + "\" for " + "field \"" + field->name() + "\"."); + return false; + } else { + ReportWarning("Unknown enumeration value of \"" + value + "\" for " + "field \"" + field->name() + "\"."); + return true; + } } SET_FIELD(Enum, enum_value); @@ -442,6 +721,60 @@ class TextFormat::Parser::ParserImpl { return true; } + bool SkipFieldValue() { + if (LookingAtType(io::Tokenizer::TYPE_STRING)) { + while (LookingAtType(io::Tokenizer::TYPE_STRING)) { + tokenizer_.Next(); + } + return true; + } + // Possible field values other than string: + // 12345 => TYPE_INTEGER + // -12345 => TYPE_SYMBOL + TYPE_INTEGER + // 1.2345 => TYPE_FLOAT + // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT + // inf => TYPE_IDENTIFIER + // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER + // TYPE_INTEGER => TYPE_IDENTIFIER + // Divides them into two group, one with TYPE_SYMBOL + // and the other without: + // Group one: + // 12345 => TYPE_INTEGER + // 1.2345 => TYPE_FLOAT + // inf => TYPE_IDENTIFIER + // TYPE_INTEGER => TYPE_IDENTIFIER + // Group two: + // -12345 => TYPE_SYMBOL + TYPE_INTEGER + // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT + // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER + // As we can see, the field value consists of an optional '-' and one of + // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER. + bool has_minus = TryConsume("-"); + if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) && + !LookingAtType(io::Tokenizer::TYPE_FLOAT) && + !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { + return false; + } + // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field + // value while other combinations all generate valid values. + // We check if the value of this combination is valid here. + // TYPE_IDENTIFIER after a '-' should be one of the float values listed + // below: + // inf, inff, infinity, nan + if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { + string text = tokenizer_.current().text; + LowerString(&text); + if (text != "inf" && + text != "infinity" && + text != "nan") { + ReportError("Invalid float number: " + text); + return false; + } + } + tokenizer_.Next(); + return true; + } + // Returns true if the current token's text is equal to that specified. bool LookingAt(const string& text) { return tokenizer_.current().text == text; @@ -455,15 +788,23 @@ class TextFormat::Parser::ParserImpl { // Consumes an identifier and saves its value in the identifier parameter. // Returns false if the token is not of type IDENTFIER. bool ConsumeIdentifier(string* identifier) { - if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { - ReportError("Expected identifier."); - return false; + if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { + *identifier = tokenizer_.current().text; + tokenizer_.Next(); + return true; } - *identifier = tokenizer_.current().text; + // If allow_field_numer_ or allow_unknown_field_ is true, we should able + // to parse integer identifiers. + if ((allow_field_number_ || allow_unknown_field_) + && LookingAtType(io::Tokenizer::TYPE_INTEGER)) { + *identifier = tokenizer_.current().text; + tokenizer_.Next(); + return true; + } - tokenizer_.Next(); - return true; + ReportError("Expected identifier."); + return false; } // Consumes a string and saves its value in the text parameter. @@ -530,6 +871,29 @@ class TextFormat::Parser::ParserImpl { return true; } + // Consumes a uint64 and saves its value in the value parameter. + // Accepts decimal numbers only, rejects hex or oct numbers. + bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) { + if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) { + ReportError("Expected integer."); + return false; + } + + const string& text = tokenizer_.current().text; + if (IsHexNumber(text) || IsOctNumber(text)) { + ReportError("Expect a decimal number."); + return false; + } + + if (!io::Tokenizer::ParseInteger(text, max_value, value)) { + ReportError("Integer out of range."); + return false; + } + + tokenizer_.Next(); + return true; + } + // Consumes a double and saves its value in the value parameter. // Note that since the tokenizer does not support negative numbers, // we actually may consume an additional token (for the minus sign) in this @@ -547,7 +911,7 @@ class TextFormat::Parser::ParserImpl { if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { // We have found an integer value for the double. uint64 integer_value; - DO(ConsumeUnsignedInteger(&integer_value, kuint64max)); + DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max)); *value = static_cast<double>(integer_value); } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) { @@ -559,7 +923,8 @@ class TextFormat::Parser::ParserImpl { } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { string text = tokenizer_.current().text; LowerString(&text); - if (text == "inf" || text == "infinity") { + if (text == "inf" || + text == "infinity") { *value = std::numeric_limits<double>::infinity(); tokenizer_.Next(); } else if (text == "nan") { @@ -616,7 +981,7 @@ class TextFormat::Parser::ParserImpl { explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) : parser_(parser) { } - virtual ~ParserErrorCollector() { }; + virtual ~ParserErrorCollector() { } virtual void AddError(int line, int column, const string& message) { parser_->ReportError(line, column, message); @@ -632,10 +997,16 @@ class TextFormat::Parser::ParserImpl { }; io::ErrorCollector* error_collector_; + TextFormat::Finder* finder_; + ParseInfoTree* parse_info_tree_; ParserErrorCollector tokenizer_error_collector_; io::Tokenizer tokenizer_; const Descriptor* root_message_type_; SingularOverwritePolicy singular_overwrite_policy_; + const bool allow_case_insensitive_field_; + const bool allow_unknown_field_; + const bool allow_unknown_enum_; + const bool allow_field_number_; bool had_errors_; }; @@ -661,7 +1032,7 @@ class TextFormat::Printer::TextGenerator { ~TextGenerator() { // Only BackUp() if we're sure we've successfully called Next() at least // once. - if (buffer_size_ > 0) { + if (!failed_ && buffer_size_ > 0) { output_->BackUp(buffer_size_); } } @@ -765,17 +1136,39 @@ class TextFormat::Printer::TextGenerator { // =========================================================================== +TextFormat::Finder::~Finder() { +} + TextFormat::Parser::Parser() : error_collector_(NULL), - allow_partial_(false) {} + finder_(NULL), + parse_info_tree_(NULL), + allow_partial_(false), + allow_case_insensitive_field_(false), + allow_unknown_field_(false), + allow_unknown_enum_(false), + allow_field_number_(false), + allow_relaxed_whitespace_(false), + allow_singular_overwrites_(false) { +} TextFormat::Parser::~Parser() {} bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input, Message* output) { output->Clear(); + + ParserImpl::SingularOverwritePolicy overwrites_policy = + allow_singular_overwrites_ + ? ParserImpl::ALLOW_SINGULAR_OVERWRITES + : ParserImpl::FORBID_SINGULAR_OVERWRITES; + ParserImpl parser(output->GetDescriptor(), input, error_collector_, - ParserImpl::FORBID_SINGULAR_OVERWRITES); + finder_, parse_info_tree_, + overwrites_policy, + allow_case_insensitive_field_, allow_unknown_field_, + allow_unknown_enum_, allow_field_number_, + allow_relaxed_whitespace_); return MergeUsingImpl(input, output, &parser); } @@ -788,7 +1181,11 @@ bool TextFormat::Parser::ParseFromString(const string& input, bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input, Message* output) { ParserImpl parser(output->GetDescriptor(), input, error_collector_, - ParserImpl::ALLOW_SINGULAR_OVERWRITES); + finder_, parse_info_tree_, + ParserImpl::ALLOW_SINGULAR_OVERWRITES, + allow_case_insensitive_field_, allow_unknown_field_, + allow_unknown_enum_, allow_field_number_, + allow_relaxed_whitespace_); return MergeUsingImpl(input, output, &parser); } @@ -798,7 +1195,7 @@ bool TextFormat::Parser::MergeFromString(const string& input, return Merge(&input_stream, output); } -bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input, +bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */, Message* output, ParserImpl* parser_impl) { if (!parser_impl->Parse(output)) return false; @@ -806,7 +1203,7 @@ bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input, vector<string> missing_fields; output->FindInitializationErrors(&missing_fields); parser_impl->ReportError(-1, 0, "Message missing required fields: " + - JoinStrings(missing_fields, ", ")); + Join(missing_fields, ", ")); return false; } return true; @@ -818,7 +1215,11 @@ bool TextFormat::Parser::ParseFieldValueFromString( Message* output) { io::ArrayInputStream input_stream(input.data(), input.size()); ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_, - ParserImpl::ALLOW_SINGULAR_OVERWRITES); + finder_, parse_info_tree_, + ParserImpl::ALLOW_SINGULAR_OVERWRITES, + allow_case_insensitive_field_, allow_unknown_field_, + allow_unknown_enum_, allow_field_number_, + allow_relaxed_whitespace_); return parser.ParseField(field, output); } @@ -844,29 +1245,138 @@ bool TextFormat::Parser::ParseFieldValueFromString( // =========================================================================== +// The default implementation for FieldValuePrinter. The base class just +// does simple formatting. That way, deriving classes could decide to fallback +// to that behavior. +TextFormat::FieldValuePrinter::FieldValuePrinter() {} +TextFormat::FieldValuePrinter::~FieldValuePrinter() {} +string TextFormat::FieldValuePrinter::PrintBool(bool val) const { + return val ? "true" : "false"; +} +string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const { + return SimpleItoa(val); +} +string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const { + return SimpleItoa(val); +} +string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const { + return SimpleItoa(val); +} +string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const { + return SimpleItoa(val); +} +string TextFormat::FieldValuePrinter::PrintFloat(float val) const { + return SimpleFtoa(val); +} +string TextFormat::FieldValuePrinter::PrintDouble(double val) const { + return SimpleDtoa(val); +} +string TextFormat::FieldValuePrinter::PrintString(const string& val) const { + return StrCat("\"", CEscape(val), "\""); +} +string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const { + return PrintString(val); +} +string TextFormat::FieldValuePrinter::PrintEnum(int32 val, + const string& name) const { + return name; +} +string TextFormat::FieldValuePrinter::PrintFieldName( + const Message& message, + const Reflection* reflection, + const FieldDescriptor* field) const { + if (field->is_extension()) { + // We special-case MessageSet elements for compatibility with proto1. + if (field->containing_type()->options().message_set_wire_format() + && field->type() == FieldDescriptor::TYPE_MESSAGE + && field->is_optional() + && field->extension_scope() == field->message_type()) { + return StrCat("[", field->message_type()->full_name(), "]"); + } else { + return StrCat("[", field->full_name(), "]"); + } + } else if (field->type() == FieldDescriptor::TYPE_GROUP) { + // Groups must be serialized with their original capitalization. + return field->message_type()->name(); + } else { + return field->name(); + } +} +string TextFormat::FieldValuePrinter::PrintMessageStart( + const Message& message, + int field_index, + int field_count, + bool single_line_mode) const { + return single_line_mode ? " { " : " {\n"; +} +string TextFormat::FieldValuePrinter::PrintMessageEnd( + const Message& message, + int field_index, + int field_count, + bool single_line_mode) const { + return single_line_mode ? "} " : "}\n"; +} + +namespace { +// Our own specialization: for UTF8 escaped strings. +class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter { + public: + virtual string PrintString(const string& val) const { + return StrCat("\"", strings::Utf8SafeCEscape(val), "\""); + } + virtual string PrintBytes(const string& val) const { + return TextFormat::FieldValuePrinter::PrintString(val); + } +}; + +} // namespace + TextFormat::Printer::Printer() : initial_indent_level_(0), single_line_mode_(false), + use_field_number_(false), use_short_repeated_primitives_(false), - utf8_string_escaping_(false) {} + hide_unknown_fields_(false), + print_message_fields_in_index_order_(false) { + SetUseUtf8StringEscaping(false); +} -TextFormat::Printer::~Printer() {} +TextFormat::Printer::~Printer() { + STLDeleteValues(&custom_printers_); +} + +void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) { + SetDefaultFieldValuePrinter(as_utf8 + ? new FieldValuePrinterUtf8Escaping() + : new FieldValuePrinter()); +} + +void TextFormat::Printer::SetDefaultFieldValuePrinter( + const FieldValuePrinter* printer) { + default_field_value_printer_.reset(printer); +} + +bool TextFormat::Printer::RegisterFieldValuePrinter( + const FieldDescriptor* field, + const FieldValuePrinter* printer) { + return field != NULL + && printer != NULL + && custom_printers_.insert(make_pair(field, printer)).second; +} bool TextFormat::Printer::PrintToString(const Message& message, - string* output) { + string* output) const { GOOGLE_DCHECK(output) << "output specified is NULL"; output->clear(); io::StringOutputStream output_stream(output); - bool result = Print(message, &output_stream); - - return result; + return Print(message, &output_stream); } bool TextFormat::Printer::PrintUnknownFieldsToString( const UnknownFieldSet& unknown_fields, - string* output) { + string* output) const { GOOGLE_DCHECK(output) << "output specified is NULL"; output->clear(); @@ -875,7 +1385,7 @@ bool TextFormat::Printer::PrintUnknownFieldsToString( } bool TextFormat::Printer::Print(const Message& message, - io::ZeroCopyOutputStream* output) { + io::ZeroCopyOutputStream* output) const { TextGenerator generator(output, initial_indent_level_); Print(message, generator); @@ -886,7 +1396,7 @@ bool TextFormat::Printer::Print(const Message& message, bool TextFormat::Printer::PrintUnknownFields( const UnknownFieldSet& unknown_fields, - io::ZeroCopyOutputStream* output) { + io::ZeroCopyOutputStream* output) const { TextGenerator generator(output, initial_indent_level_); PrintUnknownFields(unknown_fields, generator); @@ -895,22 +1405,37 @@ bool TextFormat::Printer::PrintUnknownFields( return !generator.failed(); } +namespace { +// Comparison functor for sorting FieldDescriptors by field index. +struct FieldIndexSorter { + bool operator()(const FieldDescriptor* left, + const FieldDescriptor* right) const { + return left->index() < right->index(); + } +}; +} // namespace + void TextFormat::Printer::Print(const Message& message, - TextGenerator& generator) { + TextGenerator& generator) const { const Reflection* reflection = message.GetReflection(); vector<const FieldDescriptor*> fields; reflection->ListFields(message, &fields); + if (print_message_fields_in_index_order_) { + sort(fields.begin(), fields.end(), FieldIndexSorter()); + } for (int i = 0; i < fields.size(); i++) { PrintField(message, reflection, fields[i], generator); } - PrintUnknownFields(reflection->GetUnknownFields(message), generator); + if (!hide_unknown_fields_) { + PrintUnknownFields(reflection->GetUnknownFields(message), generator); + } } void TextFormat::Printer::PrintFieldValueToString( const Message& message, const FieldDescriptor* field, int index, - string* output) { + string* output) const { GOOGLE_DCHECK(output) << "output specified is NULL"; @@ -924,7 +1449,7 @@ void TextFormat::Printer::PrintFieldValueToString( void TextFormat::Printer::PrintField(const Message& message, const Reflection* reflection, const FieldDescriptor* field, - TextGenerator& generator) { + TextGenerator& generator) const { if (use_short_repeated_primitives_ && field->is_repeated() && field->cpp_type() != FieldDescriptor::CPPTYPE_STRING && @@ -942,35 +1467,30 @@ void TextFormat::Printer::PrintField(const Message& message, } for (int j = 0; j < count; ++j) { + const int field_index = field->is_repeated() ? j : -1; + PrintFieldName(message, reflection, field, generator); if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { - if (single_line_mode_) { - generator.Print(" { "); - } else { - generator.Print(" {\n"); - generator.Indent(); - } + const FieldValuePrinter* printer = FindWithDefault( + custom_printers_, field, default_field_value_printer_.get()); + const Message& sub_message = + field->is_repeated() + ? reflection->GetRepeatedMessage(message, field, j) + : reflection->GetMessage(message, field); + generator.Print( + printer->PrintMessageStart( + sub_message, field_index, count, single_line_mode_)); + generator.Indent(); + Print(sub_message, generator); + generator.Outdent(); + generator.Print( + printer->PrintMessageEnd( + sub_message, field_index, count, single_line_mode_)); } else { generator.Print(": "); - } - - // Write the field value. - int field_index = j; - if (!field->is_repeated()) { - field_index = -1; - } - - PrintFieldValue(message, reflection, field, field_index, generator); - - if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { - if (single_line_mode_) { - generator.Print("} "); - } else { - generator.Outdent(); - generator.Print("}\n"); - } - } else { + // Write the field value. + PrintFieldValue(message, reflection, field, field_index, generator); if (single_line_mode_) { generator.Print(" "); } else { @@ -980,10 +1500,11 @@ void TextFormat::Printer::PrintField(const Message& message, } } -void TextFormat::Printer::PrintShortRepeatedField(const Message& message, - const Reflection* reflection, - const FieldDescriptor* field, - TextGenerator& generator) { +void TextFormat::Printer::PrintShortRepeatedField( + const Message& message, + const Reflection* reflection, + const FieldDescriptor* field, + TextGenerator& generator) const { // Print primitive repeated field in short form. PrintFieldName(message, reflection, field, generator); @@ -1003,27 +1524,17 @@ void TextFormat::Printer::PrintShortRepeatedField(const Message& message, void TextFormat::Printer::PrintFieldName(const Message& message, const Reflection* reflection, const FieldDescriptor* field, - TextGenerator& generator) { - if (field->is_extension()) { - generator.Print("["); - // We special-case MessageSet elements for compatibility with proto1. - if (field->containing_type()->options().message_set_wire_format() - && field->type() == FieldDescriptor::TYPE_MESSAGE - && field->is_optional() - && field->extension_scope() == field->message_type()) { - generator.Print(field->message_type()->full_name()); - } else { - generator.Print(field->full_name()); - } - generator.Print("]"); - } else { - if (field->type() == FieldDescriptor::TYPE_GROUP) { - // Groups must be serialized with their original capitalization. - generator.Print(field->message_type()->name()); - } else { - generator.Print(field->name()); - } + TextGenerator& generator) const { + // if use_field_number_ is true, prints field number instead + // of field name. + if (use_field_number_) { + generator.Print(SimpleItoa(field->number())); + return; } + + const FieldValuePrinter* printer = FindWithDefault( + custom_printers_, field, default_field_value_printer_.get()); + generator.Print(printer->PrintFieldName(message, reflection, field)); } void TextFormat::Printer::PrintFieldValue( @@ -1031,66 +1542,60 @@ void TextFormat::Printer::PrintFieldValue( const Reflection* reflection, const FieldDescriptor* field, int index, - TextGenerator& generator) { + TextGenerator& generator) const { GOOGLE_DCHECK(field->is_repeated() || (index == -1)) << "Index must be -1 for non-repeated fields"; + const FieldValuePrinter* printer + = FindWithDefault(custom_printers_, field, + default_field_value_printer_.get()); + switch (field->cpp_type()) { -#define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING) \ - case FieldDescriptor::CPPTYPE_##CPPTYPE: \ - generator.Print(TO_STRING(field->is_repeated() ? \ - reflection->GetRepeated##METHOD(message, field, index) : \ - reflection->Get##METHOD(message, field))); \ - break; \ - - OUTPUT_FIELD( INT32, Int32, SimpleItoa); - OUTPUT_FIELD( INT64, Int64, SimpleItoa); - OUTPUT_FIELD(UINT32, UInt32, SimpleItoa); - OUTPUT_FIELD(UINT64, UInt64, SimpleItoa); - OUTPUT_FIELD( FLOAT, Float, SimpleFtoa); - OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa); +#define OUTPUT_FIELD(CPPTYPE, METHOD) \ + case FieldDescriptor::CPPTYPE_##CPPTYPE: \ + generator.Print(printer->Print##METHOD(field->is_repeated() \ + ? reflection->GetRepeated##METHOD(message, field, index) \ + : reflection->Get##METHOD(message, field))); \ + break + + OUTPUT_FIELD( INT32, Int32); + OUTPUT_FIELD( INT64, Int64); + OUTPUT_FIELD(UINT32, UInt32); + OUTPUT_FIELD(UINT64, UInt64); + OUTPUT_FIELD( FLOAT, Float); + OUTPUT_FIELD(DOUBLE, Double); + OUTPUT_FIELD( BOOL, Bool); #undef OUTPUT_FIELD - case FieldDescriptor::CPPTYPE_STRING: { - string scratch; - const string& value = field->is_repeated() ? - reflection->GetRepeatedStringReference( - message, field, index, &scratch) : - reflection->GetStringReference(message, field, &scratch); - - generator.Print("\""); - if (utf8_string_escaping_) { - generator.Print(strings::Utf8SafeCEscape(value)); - } else { - generator.Print(CEscape(value)); - } - generator.Print("\""); - - break; + case FieldDescriptor::CPPTYPE_STRING: { + string scratch; + const string& value = field->is_repeated() + ? reflection->GetRepeatedStringReference( + message, field, index, &scratch) + : reflection->GetStringReference(message, field, &scratch); + if (field->type() == FieldDescriptor::TYPE_STRING) { + generator.Print(printer->PrintString(value)); + } else { + GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES); + generator.Print(printer->PrintBytes(value)); } + break; + } - case FieldDescriptor::CPPTYPE_BOOL: - if (field->is_repeated()) { - generator.Print(reflection->GetRepeatedBool(message, field, index) - ? "true" : "false"); - } else { - generator.Print(reflection->GetBool(message, field) - ? "true" : "false"); - } - break; - - case FieldDescriptor::CPPTYPE_ENUM: - generator.Print(field->is_repeated() ? - reflection->GetRepeatedEnum(message, field, index)->name() : - reflection->GetEnum(message, field)->name()); - break; + case FieldDescriptor::CPPTYPE_ENUM: { + const EnumValueDescriptor *enum_val = field->is_repeated() + ? reflection->GetRepeatedEnum(message, field, index) + : reflection->GetEnum(message, field); + generator.Print(printer->PrintEnum(enum_val->number(), enum_val->name())); + break; + } - case FieldDescriptor::CPPTYPE_MESSAGE: - Print(field->is_repeated() ? - reflection->GetRepeatedMessage(message, field, index) : - reflection->GetMessage(message, field), - generator); - break; + case FieldDescriptor::CPPTYPE_MESSAGE: + Print(field->is_repeated() + ? reflection->GetRepeatedMessage(message, field, index) + : reflection->GetMessage(message, field), + generator); + break; } } @@ -1143,7 +1648,7 @@ static string PaddedHex(IntType value) { } void TextFormat::Printer::PrintUnknownFields( - const UnknownFieldSet& unknown_fields, TextGenerator& generator) { + const UnknownFieldSet& unknown_fields, TextGenerator& generator) const { for (int i = 0; i < unknown_fields.field_count(); i++) { const UnknownField& field = unknown_fields.field(i); string field_number = SimpleItoa(field.number()); |