diff options
Diffstat (limited to 'src/google/protobuf/compiler/parser.h')
-rw-r--r-- | src/google/protobuf/compiler/parser.h | 262 |
1 files changed, 223 insertions, 39 deletions
diff --git a/src/google/protobuf/compiler/parser.h b/src/google/protobuf/compiler/parser.h index 72c96d0..d0a2359 100644 --- a/src/google/protobuf/compiler/parser.h +++ b/src/google/protobuf/compiler/parser.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// http://code.google.com/p/protobuf/ +// https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -40,7 +40,6 @@ #include <map> #include <string> #include <utility> -#include <google/protobuf/stubs/common.h> #include <google/protobuf/descriptor.h> #include <google/protobuf/descriptor.pb.h> #include <google/protobuf/repeated_field.h> @@ -74,6 +73,9 @@ class LIBPROTOBUF_EXPORT Parser { // Optional fetaures: + // DEPRECATED: New code should use the SourceCodeInfo embedded in the + // FileDescriptorProto. + // // Requests that locations of certain definitions be recorded to the given // SourceLocationTable while parsing. This can be used to look up exact line // and column numbers for errors reported by DescriptorPool during validation. @@ -82,7 +84,7 @@ class LIBPROTOBUF_EXPORT Parser { source_location_table_ = location_table; } - // Requsets that errors be recorded to the given ErrorCollector while + // Requests that errors be recorded to the given ErrorCollector while // parsing. Set to NULL (the default) to discard error messages. void RecordErrorsTo(io::ErrorCollector* error_collector) { error_collector_ = error_collector; @@ -113,6 +115,8 @@ class LIBPROTOBUF_EXPORT Parser { } private: + class LocationRecorder; + // ================================================================= // Error recovery helpers @@ -161,6 +165,8 @@ class LIBPROTOBUF_EXPORT Parser { bool ConsumeIdentifier(string* output, const char* error); // Consume an integer and store its value in "output". bool ConsumeInteger(int* output, const char* error); + // Consume a signed integer and store its value in "output". + bool ConsumeSignedInteger(int* output, const char* error); // Consume a 64-bit integer and store its value in "output". If the value // is greater than max_value, an error will be reported. bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error); @@ -170,6 +176,20 @@ class LIBPROTOBUF_EXPORT Parser { // Consume a string literal and store its (unescaped) value in "output". bool ConsumeString(string* output, const char* error); + // Consume a token representing the end of the statement. Comments between + // this token and the next will be harvested for documentation. The given + // LocationRecorder should refer to the declaration that was just parsed; + // it will be populated with these comments. + // + // TODO(kenton): The LocationRecorder is const because historically locations + // have been passed around by const reference, for no particularly good + // reason. We should probably go through and change them all to mutable + // pointer to make this more intuitive. + bool TryConsumeEndOfDeclaration(const char* text, + const LocationRecorder* location); + bool ConsumeEndOfDeclaration(const char* text, + const LocationRecorder* location); + // ----------------------------------------------------------------- // Error logging helpers @@ -180,16 +200,67 @@ class LIBPROTOBUF_EXPORT Parser { // of the current token. void AddError(const string& error); - // Record the given line and column and associate it with this descriptor - // in the SourceLocationTable. - void RecordLocation(const Message* descriptor, - DescriptorPool::ErrorCollector::ErrorLocation location, - int line, int column); - - // Record the current line and column and associate it with this descriptor - // in the SourceLocationTable. - void RecordLocation(const Message* descriptor, - DescriptorPool::ErrorCollector::ErrorLocation location); + // Records a location in the SourceCodeInfo.location table (see + // descriptor.proto). We use RAII to ensure that the start and end locations + // are recorded -- the constructor records the start location and the + // destructor records the end location. Since the parser is + // recursive-descent, this works out beautifully. + class LIBPROTOBUF_EXPORT LocationRecorder { + public: + // Construct the file's "root" location. + LocationRecorder(Parser* parser); + + // Construct a location that represents a declaration nested within the + // given parent. E.g. a field's location is nested within the location + // for a message type. The parent's path will be copied, so you should + // call AddPath() only to add the path components leading from the parent + // to the child (as opposed to leading from the root to the child). + LocationRecorder(const LocationRecorder& parent); + + // Convenience constructors that call AddPath() one or two times. + LocationRecorder(const LocationRecorder& parent, int path1); + LocationRecorder(const LocationRecorder& parent, int path1, int path2); + + ~LocationRecorder(); + + // Add a path component. See SourceCodeInfo.Location.path in + // descriptor.proto. + void AddPath(int path_component); + + // By default the location is considered to start at the current token at + // the time the LocationRecorder is created. StartAt() sets the start + // location to the given token instead. + void StartAt(const io::Tokenizer::Token& token); + + // Start at the same location as some other LocationRecorder. + void StartAt(const LocationRecorder& other); + + // By default the location is considered to end at the previous token at + // the time the LocationRecorder is destroyed. EndAt() sets the end + // location to the given token instead. + void EndAt(const io::Tokenizer::Token& token); + + // Records the start point of this location to the SourceLocationTable that + // was passed to RecordSourceLocationsTo(), if any. SourceLocationTable + // is an older way of keeping track of source locations which is still + // used in some places. + void RecordLegacyLocation(const Message* descriptor, + DescriptorPool::ErrorCollector::ErrorLocation location); + + // Attaches leading and trailing comments to the location. The two strings + // will be swapped into place, so after this is called *leading and + // *trailing will be empty. + // + // TODO(kenton): See comment on TryConsumeEndOfDeclaration(), above, for + // why this is const. + void AttachComments(string* leading, string* trailing) const; + + private: + Parser* parser_; + SourceCodeInfo::Location* location_; + + void Init(const LocationRecorder& parent); + }; // ================================================================= // Parsers for various language constructs @@ -210,54 +281,131 @@ class LIBPROTOBUF_EXPORT Parser { // makes logic much simpler for the caller. // Parse a top-level message, enum, service, etc. - bool ParseTopLevelStatement(FileDescriptorProto* file); + bool ParseTopLevelStatement(FileDescriptorProto* file, + const LocationRecorder& root_location); // Parse various language high-level language construrcts. - bool ParseMessageDefinition(DescriptorProto* message); - bool ParseEnumDefinition(EnumDescriptorProto* enum_type); - bool ParseServiceDefinition(ServiceDescriptorProto* service); - bool ParsePackage(FileDescriptorProto* file); - bool ParseImport(string* import_filename); - bool ParseOption(Message* options); + bool ParseMessageDefinition(DescriptorProto* message, + const LocationRecorder& message_location, + const FileDescriptorProto* containing_file); + bool ParseEnumDefinition(EnumDescriptorProto* enum_type, + const LocationRecorder& enum_location, + const FileDescriptorProto* containing_file); + bool ParseServiceDefinition(ServiceDescriptorProto* service, + const LocationRecorder& service_location, + const FileDescriptorProto* containing_file); + bool ParsePackage(FileDescriptorProto* file, + const LocationRecorder& root_location, + const FileDescriptorProto* containing_file); + bool ParseImport(RepeatedPtrField<string>* dependency, + RepeatedField<int32>* public_dependency, + RepeatedField<int32>* weak_dependency, + const LocationRecorder& root_location, + const FileDescriptorProto* containing_file); + bool ParseOption(Message* options, + const LocationRecorder& options_location, + const FileDescriptorProto* containing_file); // These methods parse the contents of a message, enum, or service type and // add them to the given object. They consume the entire block including // the beginning and ending brace. - bool ParseMessageBlock(DescriptorProto* message); - bool ParseEnumBlock(EnumDescriptorProto* enum_type); - bool ParseServiceBlock(ServiceDescriptorProto* service); + bool ParseMessageBlock(DescriptorProto* message, + const LocationRecorder& message_location, + const FileDescriptorProto* containing_file); + bool ParseEnumBlock(EnumDescriptorProto* enum_type, + const LocationRecorder& enum_location, + const FileDescriptorProto* containing_file); + bool ParseServiceBlock(ServiceDescriptorProto* service, + const LocationRecorder& service_location, + const FileDescriptorProto* containing_file); // Parse one statement within a message, enum, or service block, inclunding // final semicolon. - bool ParseMessageStatement(DescriptorProto* message); - bool ParseEnumStatement(EnumDescriptorProto* message); - bool ParseServiceStatement(ServiceDescriptorProto* message); + bool ParseMessageStatement(DescriptorProto* message, + const LocationRecorder& message_location, + const FileDescriptorProto* containing_file); + bool ParseEnumStatement(EnumDescriptorProto* message, + const LocationRecorder& enum_location, + const FileDescriptorProto* containing_file); + bool ParseServiceStatement(ServiceDescriptorProto* message, + const LocationRecorder& service_location, + const FileDescriptorProto* containing_file); // Parse a field of a message. If the field is a group, its type will be // added to "messages". + // + // parent_location and location_field_number_for_nested_type are needed when + // parsing groups -- we need to generate a nested message type within the + // parent and record its location accordingly. Since the parent could be + // either a FileDescriptorProto or a DescriptorProto, we must pass in the + // correct field number to use. bool ParseMessageField(FieldDescriptorProto* field, - RepeatedPtrField<DescriptorProto>* messages); + RepeatedPtrField<DescriptorProto>* messages, + const LocationRecorder& parent_location, + int location_field_number_for_nested_type, + const LocationRecorder& field_location, + const FileDescriptorProto* containing_file); + + // Like ParseMessageField() but expects the label has already been filled in + // by the caller. + bool ParseMessageFieldNoLabel(FieldDescriptorProto* field, + RepeatedPtrField<DescriptorProto>* messages, + const LocationRecorder& parent_location, + int location_field_number_for_nested_type, + const LocationRecorder& field_location, + const FileDescriptorProto* containing_file); // Parse an "extensions" declaration. - bool ParseExtensions(DescriptorProto* message); + bool ParseExtensions(DescriptorProto* message, + const LocationRecorder& extensions_location, + const FileDescriptorProto* containing_file); - // Parse an "extend" declaration. + // Parse an "extend" declaration. (See also comments for + // ParseMessageField().) bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions, - RepeatedPtrField<DescriptorProto>* messages); + RepeatedPtrField<DescriptorProto>* messages, + const LocationRecorder& parent_location, + int location_field_number_for_nested_type, + const LocationRecorder& extend_location, + const FileDescriptorProto* containing_file); + + // Parse a "oneof" declaration. The caller is responsible for setting + // oneof_decl->label() since it will have had to parse the label before it + // knew it was parsing a oneof. + bool ParseOneof(OneofDescriptorProto* oneof_decl, + DescriptorProto* containing_type, + int oneof_index, + const LocationRecorder& oneof_location, + const LocationRecorder& containing_type_location, + const FileDescriptorProto* containing_file); // Parse a single enum value within an enum block. - bool ParseEnumConstant(EnumValueDescriptorProto* enum_value); + bool ParseEnumConstant(EnumValueDescriptorProto* enum_value, + const LocationRecorder& enum_value_location, + const FileDescriptorProto* containing_file); // Parse enum constant options, i.e. the list in square brackets at the end // of the enum constant value definition. - bool ParseEnumConstantOptions(EnumValueDescriptorProto* value); + bool ParseEnumConstantOptions(EnumValueDescriptorProto* value, + const LocationRecorder& enum_value_location, + const FileDescriptorProto* containing_file); // Parse a single method within a service definition. - bool ParseServiceMethod(MethodDescriptorProto* method); + bool ParseServiceMethod(MethodDescriptorProto* method, + const LocationRecorder& method_location, + const FileDescriptorProto* containing_file); + + + // Parse options of a single method or stream. + bool ParseOptions(const LocationRecorder& parent_location, + const FileDescriptorProto* containing_file, + const int optionsFieldNumber, + Message* mutable_options); // Parse "required", "optional", or "repeated" and fill in "label" // with the value. - bool ParseLabel(FieldDescriptorProto::Label* label); + bool ParseLabel(FieldDescriptorProto::Label* label, + const FileDescriptorProto* containing_file); // Parse a type name and fill in "type" (if it is a primitive) or // "type_name" (if it is not) with the type parsed. @@ -269,39 +417,75 @@ class LIBPROTOBUF_EXPORT Parser { // Parses field options, i.e. the stuff in square brackets at the end // of a field definition. Also parses default value. - bool ParseFieldOptions(FieldDescriptorProto* field); + bool ParseFieldOptions(FieldDescriptorProto* field, + const LocationRecorder& field_location, + const FileDescriptorProto* containing_file); // Parse the "default" option. This needs special handling because its // type is the field's type. - bool ParseDefaultAssignment(FieldDescriptorProto* field); + bool ParseDefaultAssignment(FieldDescriptorProto* field, + const LocationRecorder& field_location, + const FileDescriptorProto* containing_file); + + enum OptionStyle { + OPTION_ASSIGNMENT, // just "name = value" + OPTION_STATEMENT // "option name = value;" + }; // Parse a single option name/value pair, e.g. "ctype = CORD". The name // identifies a field of the given Message, and the value of that field // is set to the parsed value. - bool ParseOptionAssignment(Message* options); + bool ParseOption(Message* options, + const LocationRecorder& options_location, + const FileDescriptorProto* containing_file, + OptionStyle style); // Parses a single part of a multipart option name. A multipart name consists // of names separated by dots. Each name is either an identifier or a series // of identifiers separated by dots and enclosed in parentheses. E.g., // "foo.(bar.baz).qux". - bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option); + bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option, + const LocationRecorder& part_location, + const FileDescriptorProto* containing_file); + + // Parses a string surrounded by balanced braces. Strips off the outer + // braces and stores the enclosed string in *value. + // E.g., + // { foo } *value gets 'foo' + // { foo { bar: box } } *value gets 'foo { bar: box }' + // {} *value gets '' + // + // REQUIRES: LookingAt("{") + // When finished successfully, we are looking at the first token past + // the ending brace. + bool ParseUninterpretedBlock(string* value); // ================================================================= io::Tokenizer* input_; io::ErrorCollector* error_collector_; - SourceLocationTable* source_location_table_; + SourceCodeInfo* source_code_info_; + SourceLocationTable* source_location_table_; // legacy bool had_errors_; bool require_syntax_identifier_; bool stop_after_syntax_identifier_; string syntax_identifier_; + // Leading doc comments for the next declaration. These are not complete + // yet; use ConsumeEndOfDeclaration() to get the complete comments. + string upcoming_doc_comments_; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser); }; // A table mapping (descriptor, ErrorLocation) pairs -- as reported by // DescriptorPool when validating descriptors -- to line and column numbers // within the original source code. +// +// This is semi-obsolete: FileDescriptorProto.source_code_info now contains +// far more complete information about source locations. However, as of this +// writing you still need to use SourceLocationTable when integrating with +// DescriptorPool. class LIBPROTOBUF_EXPORT SourceLocationTable { public: SourceLocationTable(); |