aboutsummaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/io/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/io/tokenizer.h')
-rw-r--r--src/google/protobuf/io/tokenizer.h117
1 files changed, 9 insertions, 108 deletions
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 8c6220a..d115161 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -38,7 +38,6 @@
#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__
#include <string>
-#include <vector>
#include <google/protobuf/stubs/common.h>
namespace google {
@@ -67,8 +66,7 @@ class LIBPROTOBUF_EXPORT ErrorCollector {
// Indicates that there was a warning in the input at the given line and
// column numbers. The numbers are zero-based, so you may want to add
// 1 to each before printing them.
- virtual void AddWarning(int /* line */, int /* column */,
- const string& /* message */) { }
+ virtual void AddWarning(int line, int column, const string& message) { }
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
@@ -124,68 +122,16 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// the token within the input stream. They are zero-based.
int line;
int column;
- int end_column;
};
// Get the current token. This is updated when Next() is called. Before
// the first call to Next(), current() has type TYPE_START and no contents.
const Token& current();
- // Return the previous token -- i.e. what current() returned before the
- // previous call to Next().
- const Token& previous();
-
// Advance to the next token. Returns false if the end of the input is
// reached.
bool Next();
- // Like Next(), but also collects comments which appear between the previous
- // and next tokens.
- //
- // Comments which appear to be attached to the previous token are stored
- // in *prev_tailing_comments. Comments which appear to be attached to the
- // next token are stored in *next_leading_comments. Comments appearing in
- // between which do not appear to be attached to either will be added to
- // detached_comments. Any of these parameters can be NULL to simply discard
- // the comments.
- //
- // A series of line comments appearing on consecutive lines, with no other
- // tokens appearing on those lines, will be treated as a single comment.
- //
- // Only the comment content is returned; comment markers (e.g. //) are
- // stripped out. For block comments, leading whitespace and an asterisk will
- // be stripped from the beginning of each line other than the first. Newlines
- // are included in the output.
- //
- // Examples:
- //
- // optional int32 foo = 1; // Comment attached to foo.
- // // Comment attached to bar.
- // optional int32 bar = 2;
- //
- // optional string baz = 3;
- // // Comment attached to baz.
- // // Another line attached to baz.
- //
- // // Comment attached to qux.
- // //
- // // Another line attached to qux.
- // optional double qux = 4;
- //
- // // Detached comment. This is not attached to qux or corge
- // // because there are blank lines separating it from both.
- //
- // optional string corge = 5;
- // /* Block comment attached
- // * to corge. Leading asterisks
- // * will be removed. */
- // /* Block comment attached to
- // * grault. */
- // optional int32 grault = 6;
- bool NextWithComments(string* prev_trailing_comments,
- vector<string>* detached_comments,
- string* next_leading_comments);
-
// Parse helpers ---------------------------------------------------
// Parses a TYPE_FLOAT token. This never fails, so long as the text actually
@@ -229,27 +175,11 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// Sets the comment style.
void set_comment_style(CommentStyle style) { comment_style_ = style; }
- // Whether to require whitespace between a number and a field name.
- // Default is true. Do not use this; for Google-internal cleanup only.
- void set_require_space_after_number(bool require) {
- require_space_after_number_ = require;
- }
-
- // Whether to allow string literals to span multiple lines. Default is false.
- // Do not use this; for Google-internal cleanup only.
- void set_allow_multiline_strings(bool allow) {
- allow_multiline_strings_ = allow;
- }
-
- // External helper: validate an identifier.
- static bool IsIdentifier(const string& text);
-
// -----------------------------------------------------------------
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
Token current_; // Returned by current().
- Token previous_; // Returned by previous().
ZeroCopyInputStream* input_;
ErrorCollector* error_collector_;
@@ -264,18 +194,15 @@ class LIBPROTOBUF_EXPORT Tokenizer {
int line_;
int column_;
- // String to which text should be appended as we advance through it.
- // Call RecordTo(&str) to start recording and StopRecording() to stop.
- // E.g. StartToken() calls RecordTo(&current_.text). record_start_ is the
- // position within the current buffer where recording started.
- string* record_target_;
- int record_start_;
+ // Position in buffer_ where StartToken() was called. If the token
+ // started in the previous buffer, this is zero, and current_.text already
+ // contains the part of the token from the previous buffer. If not
+ // currently parsing a token, this is -1.
+ int token_start_;
// Options.
bool allow_f_after_float_;
CommentStyle comment_style_;
- bool require_space_after_number_;
- bool allow_multiline_strings_;
// Since we count columns we need to interpret tabs somehow. We'll take
// the standard 8-character definition for lack of any way to do better.
@@ -290,9 +217,6 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// Read a new buffer from the input.
void Refresh();
- inline void RecordTo(string* target);
- inline void StopRecording();
-
// Called when the current character is the first character of a new
// token (not including whitespace or comments).
inline void StartToken();
@@ -325,28 +249,9 @@ class LIBPROTOBUF_EXPORT Tokenizer {
TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot);
// Consume the rest of a line.
- void ConsumeLineComment(string* content);
+ void ConsumeLineComment();
// Consume until "*/".
- void ConsumeBlockComment(string* content);
-
- enum NextCommentStatus {
- // Started a line comment.
- LINE_COMMENT,
-
- // Started a block comment.
- BLOCK_COMMENT,
-
- // Consumed a slash, then realized it wasn't a comment. current_ has
- // been filled in with a slash token. The caller should return it.
- SLASH_NOT_COMMENT,
-
- // We do not appear to be starting a comment here.
- NO_COMMENT
- };
-
- // If we're at the start of a new comment, consume it and return what kind
- // of comment it is.
- NextCommentStatus TryConsumeCommentStart();
+ void ConsumeBlockComment();
// -----------------------------------------------------------------
// These helper methods make the parsing code more readable. The
@@ -386,10 +291,6 @@ inline const Tokenizer::Token& Tokenizer::current() {
return current_;
}
-inline const Tokenizer::Token& Tokenizer::previous() {
- return previous_;
-}
-
inline void Tokenizer::ParseString(const string& text, string* output) {
output->clear();
ParseStringAppend(text, output);