aboutsummaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/compiler/parser.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/compiler/parser.h')
-rw-r--r--src/google/protobuf/compiler/parser.h260
1 files changed, 222 insertions, 38 deletions
diff --git a/src/google/protobuf/compiler/parser.h b/src/google/protobuf/compiler/parser.h
index 72c96d0..6bca7fa 100644
--- a/src/google/protobuf/compiler/parser.h
+++ b/src/google/protobuf/compiler/parser.h
@@ -40,7 +40,6 @@
#include <map>
#include <string>
#include <utility>
-#include <google/protobuf/stubs/common.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/repeated_field.h>
@@ -74,6 +73,9 @@ class LIBPROTOBUF_EXPORT Parser {
// Optional fetaures:
+ // DEPRECATED: New code should use the SourceCodeInfo embedded in the
+ // FileDescriptorProto.
+ //
// Requests that locations of certain definitions be recorded to the given
// SourceLocationTable while parsing. This can be used to look up exact line
// and column numbers for errors reported by DescriptorPool during validation.
@@ -82,7 +84,7 @@ class LIBPROTOBUF_EXPORT Parser {
source_location_table_ = location_table;
}
- // Requsets that errors be recorded to the given ErrorCollector while
+ // Requests that errors be recorded to the given ErrorCollector while
// parsing. Set to NULL (the default) to discard error messages.
void RecordErrorsTo(io::ErrorCollector* error_collector) {
error_collector_ = error_collector;
@@ -113,6 +115,8 @@ class LIBPROTOBUF_EXPORT Parser {
}
private:
+ class LocationRecorder;
+
// =================================================================
// Error recovery helpers
@@ -161,6 +165,8 @@ class LIBPROTOBUF_EXPORT Parser {
bool ConsumeIdentifier(string* output, const char* error);
// Consume an integer and store its value in "output".
bool ConsumeInteger(int* output, const char* error);
+ // Consume a signed integer and store its value in "output".
+ bool ConsumeSignedInteger(int* output, const char* error);
// Consume a 64-bit integer and store its value in "output". If the value
// is greater than max_value, an error will be reported.
bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
@@ -170,6 +176,20 @@ class LIBPROTOBUF_EXPORT Parser {
// Consume a string literal and store its (unescaped) value in "output".
bool ConsumeString(string* output, const char* error);
+ // Consume a token representing the end of the statement. Comments between
+ // this token and the next will be harvested for documentation. The given
+ // LocationRecorder should refer to the declaration that was just parsed;
+ // it will be populated with these comments.
+ //
+ // TODO(kenton): The LocationRecorder is const because historically locations
+ // have been passed around by const reference, for no particularly good
+ // reason. We should probably go through and change them all to mutable
+ // pointer to make this more intuitive.
+ bool TryConsumeEndOfDeclaration(const char* text,
+ const LocationRecorder* location);
+ bool ConsumeEndOfDeclaration(const char* text,
+ const LocationRecorder* location);
+
// -----------------------------------------------------------------
// Error logging helpers
@@ -180,16 +200,67 @@ class LIBPROTOBUF_EXPORT Parser {
// of the current token.
void AddError(const string& error);
- // Record the given line and column and associate it with this descriptor
- // in the SourceLocationTable.
- void RecordLocation(const Message* descriptor,
- DescriptorPool::ErrorCollector::ErrorLocation location,
- int line, int column);
-
- // Record the current line and column and associate it with this descriptor
- // in the SourceLocationTable.
- void RecordLocation(const Message* descriptor,
- DescriptorPool::ErrorCollector::ErrorLocation location);
+ // Records a location in the SourceCodeInfo.location table (see
+ // descriptor.proto). We use RAII to ensure that the start and end locations
+ // are recorded -- the constructor records the start location and the
+ // destructor records the end location. Since the parser is
+ // recursive-descent, this works out beautifully.
+ class LIBPROTOBUF_EXPORT LocationRecorder {
+ public:
+ // Construct the file's "root" location.
+ LocationRecorder(Parser* parser);
+
+ // Construct a location that represents a declaration nested within the
+ // given parent. E.g. a field's location is nested within the location
+ // for a message type. The parent's path will be copied, so you should
+ // call AddPath() only to add the path components leading from the parent
+ // to the child (as opposed to leading from the root to the child).
+ LocationRecorder(const LocationRecorder& parent);
+
+ // Convenience constructors that call AddPath() one or two times.
+ LocationRecorder(const LocationRecorder& parent, int path1);
+ LocationRecorder(const LocationRecorder& parent, int path1, int path2);
+
+ ~LocationRecorder();
+
+ // Add a path component. See SourceCodeInfo.Location.path in
+ // descriptor.proto.
+ void AddPath(int path_component);
+
+ // By default the location is considered to start at the current token at
+ // the time the LocationRecorder is created. StartAt() sets the start
+ // location to the given token instead.
+ void StartAt(const io::Tokenizer::Token& token);
+
+ // Start at the same location as some other LocationRecorder.
+ void StartAt(const LocationRecorder& other);
+
+ // By default the location is considered to end at the previous token at
+ // the time the LocationRecorder is destroyed. EndAt() sets the end
+ // location to the given token instead.
+ void EndAt(const io::Tokenizer::Token& token);
+
+ // Records the start point of this location to the SourceLocationTable that
+ // was passed to RecordSourceLocationsTo(), if any. SourceLocationTable
+ // is an older way of keeping track of source locations which is still
+ // used in some places.
+ void RecordLegacyLocation(const Message* descriptor,
+ DescriptorPool::ErrorCollector::ErrorLocation location);
+
+ // Attaches leading and trailing comments to the location. The two strings
+ // will be swapped into place, so after this is called *leading and
+ // *trailing will be empty.
+ //
+ // TODO(kenton): See comment on TryConsumeEndOfDeclaration(), above, for
+ // why this is const.
+ void AttachComments(string* leading, string* trailing) const;
+
+ private:
+ Parser* parser_;
+ SourceCodeInfo::Location* location_;
+
+ void Init(const LocationRecorder& parent);
+ };
// =================================================================
// Parsers for various language constructs
@@ -210,54 +281,131 @@ class LIBPROTOBUF_EXPORT Parser {
// makes logic much simpler for the caller.
// Parse a top-level message, enum, service, etc.
- bool ParseTopLevelStatement(FileDescriptorProto* file);
+ bool ParseTopLevelStatement(FileDescriptorProto* file,
+ const LocationRecorder& root_location);
// Parse various language high-level language construrcts.
- bool ParseMessageDefinition(DescriptorProto* message);
- bool ParseEnumDefinition(EnumDescriptorProto* enum_type);
- bool ParseServiceDefinition(ServiceDescriptorProto* service);
- bool ParsePackage(FileDescriptorProto* file);
- bool ParseImport(string* import_filename);
- bool ParseOption(Message* options);
+ bool ParseMessageDefinition(DescriptorProto* message,
+ const LocationRecorder& message_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseEnumDefinition(EnumDescriptorProto* enum_type,
+ const LocationRecorder& enum_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseServiceDefinition(ServiceDescriptorProto* service,
+ const LocationRecorder& service_location,
+ const FileDescriptorProto* containing_file);
+ bool ParsePackage(FileDescriptorProto* file,
+ const LocationRecorder& root_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseImport(RepeatedPtrField<string>* dependency,
+ RepeatedField<int32>* public_dependency,
+ RepeatedField<int32>* weak_dependency,
+ const LocationRecorder& root_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseOption(Message* options,
+ const LocationRecorder& options_location,
+ const FileDescriptorProto* containing_file);
// These methods parse the contents of a message, enum, or service type and
// add them to the given object. They consume the entire block including
// the beginning and ending brace.
- bool ParseMessageBlock(DescriptorProto* message);
- bool ParseEnumBlock(EnumDescriptorProto* enum_type);
- bool ParseServiceBlock(ServiceDescriptorProto* service);
+ bool ParseMessageBlock(DescriptorProto* message,
+ const LocationRecorder& message_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseEnumBlock(EnumDescriptorProto* enum_type,
+ const LocationRecorder& enum_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseServiceBlock(ServiceDescriptorProto* service,
+ const LocationRecorder& service_location,
+ const FileDescriptorProto* containing_file);
// Parse one statement within a message, enum, or service block, inclunding
// final semicolon.
- bool ParseMessageStatement(DescriptorProto* message);
- bool ParseEnumStatement(EnumDescriptorProto* message);
- bool ParseServiceStatement(ServiceDescriptorProto* message);
+ bool ParseMessageStatement(DescriptorProto* message,
+ const LocationRecorder& message_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseEnumStatement(EnumDescriptorProto* message,
+ const LocationRecorder& enum_location,
+ const FileDescriptorProto* containing_file);
+ bool ParseServiceStatement(ServiceDescriptorProto* message,
+ const LocationRecorder& service_location,
+ const FileDescriptorProto* containing_file);
// Parse a field of a message. If the field is a group, its type will be
// added to "messages".
+ //
+ // parent_location and location_field_number_for_nested_type are needed when
+ // parsing groups -- we need to generate a nested message type within the
+ // parent and record its location accordingly. Since the parent could be
+ // either a FileDescriptorProto or a DescriptorProto, we must pass in the
+ // correct field number to use.
bool ParseMessageField(FieldDescriptorProto* field,
- RepeatedPtrField<DescriptorProto>* messages);
+ RepeatedPtrField<DescriptorProto>* messages,
+ const LocationRecorder& parent_location,
+ int location_field_number_for_nested_type,
+ const LocationRecorder& field_location,
+ const FileDescriptorProto* containing_file);
+
+ // Like ParseMessageField() but expects the label has already been filled in
+ // by the caller.
+ bool ParseMessageFieldNoLabel(FieldDescriptorProto* field,
+ RepeatedPtrField<DescriptorProto>* messages,
+ const LocationRecorder& parent_location,
+ int location_field_number_for_nested_type,
+ const LocationRecorder& field_location,
+ const FileDescriptorProto* containing_file);
// Parse an "extensions" declaration.
- bool ParseExtensions(DescriptorProto* message);
+ bool ParseExtensions(DescriptorProto* message,
+ const LocationRecorder& extensions_location,
+ const FileDescriptorProto* containing_file);
- // Parse an "extend" declaration.
+ // Parse an "extend" declaration. (See also comments for
+ // ParseMessageField().)
bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
- RepeatedPtrField<DescriptorProto>* messages);
+ RepeatedPtrField<DescriptorProto>* messages,
+ const LocationRecorder& parent_location,
+ int location_field_number_for_nested_type,
+ const LocationRecorder& extend_location,
+ const FileDescriptorProto* containing_file);
+
+ // Parse a "oneof" declaration. The caller is responsible for setting
+ // oneof_decl->label() since it will have had to parse the label before it
+ // knew it was parsing a oneof.
+ bool ParseOneof(OneofDescriptorProto* oneof_decl,
+ DescriptorProto* containing_type,
+ int oneof_index,
+ const LocationRecorder& oneof_location,
+ const LocationRecorder& containing_type_location,
+ const FileDescriptorProto* containing_file);
// Parse a single enum value within an enum block.
- bool ParseEnumConstant(EnumValueDescriptorProto* enum_value);
+ bool ParseEnumConstant(EnumValueDescriptorProto* enum_value,
+ const LocationRecorder& enum_value_location,
+ const FileDescriptorProto* containing_file);
// Parse enum constant options, i.e. the list in square brackets at the end
// of the enum constant value definition.
- bool ParseEnumConstantOptions(EnumValueDescriptorProto* value);
+ bool ParseEnumConstantOptions(EnumValueDescriptorProto* value,
+ const LocationRecorder& enum_value_location,
+ const FileDescriptorProto* containing_file);
// Parse a single method within a service definition.
- bool ParseServiceMethod(MethodDescriptorProto* method);
+ bool ParseServiceMethod(MethodDescriptorProto* method,
+ const LocationRecorder& method_location,
+ const FileDescriptorProto* containing_file);
+
+
+ // Parse options of a single method or stream.
+ bool ParseOptions(const LocationRecorder& parent_location,
+ const FileDescriptorProto* containing_file,
+ const int optionsFieldNumber,
+ Message* mutable_options);
// Parse "required", "optional", or "repeated" and fill in "label"
// with the value.
- bool ParseLabel(FieldDescriptorProto::Label* label);
+ bool ParseLabel(FieldDescriptorProto::Label* label,
+ const FileDescriptorProto* containing_file);
// Parse a type name and fill in "type" (if it is a primitive) or
// "type_name" (if it is not) with the type parsed.
@@ -269,39 +417,75 @@ class LIBPROTOBUF_EXPORT Parser {
// Parses field options, i.e. the stuff in square brackets at the end
// of a field definition. Also parses default value.
- bool ParseFieldOptions(FieldDescriptorProto* field);
+ bool ParseFieldOptions(FieldDescriptorProto* field,
+ const LocationRecorder& field_location,
+ const FileDescriptorProto* containing_file);
// Parse the "default" option. This needs special handling because its
// type is the field's type.
- bool ParseDefaultAssignment(FieldDescriptorProto* field);
+ bool ParseDefaultAssignment(FieldDescriptorProto* field,
+ const LocationRecorder& field_location,
+ const FileDescriptorProto* containing_file);
+
+ enum OptionStyle {
+ OPTION_ASSIGNMENT, // just "name = value"
+ OPTION_STATEMENT // "option name = value;"
+ };
// Parse a single option name/value pair, e.g. "ctype = CORD". The name
// identifies a field of the given Message, and the value of that field
// is set to the parsed value.
- bool ParseOptionAssignment(Message* options);
+ bool ParseOption(Message* options,
+ const LocationRecorder& options_location,
+ const FileDescriptorProto* containing_file,
+ OptionStyle style);
// Parses a single part of a multipart option name. A multipart name consists
// of names separated by dots. Each name is either an identifier or a series
// of identifiers separated by dots and enclosed in parentheses. E.g.,
// "foo.(bar.baz).qux".
- bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option);
+ bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
+ const LocationRecorder& part_location,
+ const FileDescriptorProto* containing_file);
+
+ // Parses a string surrounded by balanced braces. Strips off the outer
+ // braces and stores the enclosed string in *value.
+ // E.g.,
+ // { foo } *value gets 'foo'
+ // { foo { bar: box } } *value gets 'foo { bar: box }'
+ // {} *value gets ''
+ //
+ // REQUIRES: LookingAt("{")
+ // When finished successfully, we are looking at the first token past
+ // the ending brace.
+ bool ParseUninterpretedBlock(string* value);
// =================================================================
io::Tokenizer* input_;
io::ErrorCollector* error_collector_;
- SourceLocationTable* source_location_table_;
+ SourceCodeInfo* source_code_info_;
+ SourceLocationTable* source_location_table_; // legacy
bool had_errors_;
bool require_syntax_identifier_;
bool stop_after_syntax_identifier_;
string syntax_identifier_;
+ // Leading doc comments for the next declaration. These are not complete
+ // yet; use ConsumeEndOfDeclaration() to get the complete comments.
+ string upcoming_doc_comments_;
+
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser);
};
// A table mapping (descriptor, ErrorLocation) pairs -- as reported by
// DescriptorPool when validating descriptors -- to line and column numbers
// within the original source code.
+//
+// This is semi-obsolete: FileDescriptorProto.source_code_info now contains
+// far more complete information about source locations. However, as of this
+// writing you still need to use SourceLocationTable when integrating with
+// DescriptorPool.
class LIBPROTOBUF_EXPORT SourceLocationTable {
public:
SourceLocationTable();