aboutsummaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/io/tokenizer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/io/tokenizer.cc')
-rw-r--r--src/google/protobuf/io/tokenizer.cc544
1 files changed, 490 insertions, 54 deletions
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index 38fa351..d149305 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -89,8 +89,12 @@
// exactly pretty.
#include <google/protobuf/io/tokenizer.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stringprintf.h>
+#include <google/protobuf/io/strtod.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/stl_util.h>
namespace google {
namespace protobuf {
@@ -118,6 +122,8 @@ namespace {
CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
c == '\r' || c == '\v' || c == '\f');
+CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' ||
+ c == '\r' || c == '\v' || c == '\f');
CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
@@ -187,12 +193,16 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
read_error_(false),
line_(0),
column_(0),
- token_start_(-1),
+ record_target_(NULL),
+ record_start_(-1),
allow_f_after_float_(false),
- comment_style_(CPP_COMMENT_STYLE) {
+ comment_style_(CPP_COMMENT_STYLE),
+ require_space_after_number_(true),
+ allow_multiline_strings_(false) {
current_.line = 0;
current_.column = 0;
+ current_.end_column = 0;
current_.type = TYPE_START;
Refresh();
@@ -237,9 +247,9 @@ void Tokenizer::Refresh() {
}
// If we're in a token, append the rest of the buffer to it.
- if (token_start_ >= 0 && token_start_ < buffer_size_) {
- current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_);
- token_start_ = 0;
+ if (record_target_ != NULL && record_start_ < buffer_size_) {
+ record_target_->append(buffer_ + record_start_, buffer_size_ - record_start_);
+ record_start_ = 0;
}
const void* data = NULL;
@@ -260,23 +270,34 @@ void Tokenizer::Refresh() {
current_char_ = buffer_[0];
}
+inline void Tokenizer::RecordTo(string* target) {
+ record_target_ = target;
+ record_start_ = buffer_pos_;
+}
+
+inline void Tokenizer::StopRecording() {
+ // Note: The if() is necessary because some STL implementations crash when
+ // you call string::append(NULL, 0), presumably because they are trying to
+ // be helpful by detecting the NULL pointer, even though there's nothing
+ // wrong with reading zero bytes from NULL.
+ if (buffer_pos_ != record_start_) {
+ record_target_->append(buffer_ + record_start_, buffer_pos_ - record_start_);
+ }
+ record_target_ = NULL;
+ record_start_ = -1;
+}
+
inline void Tokenizer::StartToken() {
- token_start_ = buffer_pos_;
current_.type = TYPE_START; // Just for the sake of initializing it.
current_.text.clear();
current_.line = line_;
current_.column = column_;
+ RecordTo(&current_.text);
}
inline void Tokenizer::EndToken() {
- // Note: The if() is necessary because some STL implementations crash when
- // you call string::append(NULL, 0), presumably because they are trying to
- // be helpful by detecting the NULL pointer, even though there's nothing
- // wrong with reading zero bytes from NULL.
- if (buffer_pos_ != token_start_) {
- current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_);
- }
- token_start_ = -1;
+ StopRecording();
+ current_.end_column = column_;
}
// -------------------------------------------------------------------
@@ -332,9 +353,16 @@ void Tokenizer::ConsumeString(char delimiter) {
while (true) {
switch (current_char_) {
case '\0':
- case '\n': {
- AddError("String literals cannot cross line boundaries.");
+ AddError("Unexpected end of string.");
return;
+
+ case '\n': {
+ if (!allow_multiline_strings_) {
+ AddError("String literals cannot cross line boundaries.");
+ return;
+ }
+ NextChar();
+ break;
}
case '\\': {
@@ -351,6 +379,27 @@ void Tokenizer::ConsumeString(char delimiter) {
AddError("Expected hex digits for escape sequence.");
}
// Possibly followed by another hex digit, but again we don't care.
+ } else if (TryConsume('u')) {
+ if (!TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>()) {
+ AddError("Expected four hex digits for \\u escape sequence.");
+ }
+ } else if (TryConsume('U')) {
+ // We expect 8 hex digits; but only the range up to 0x10ffff is
+ // legal.
+ if (!TryConsume('0') ||
+ !TryConsume('0') ||
+ !(TryConsume('0') || TryConsume('1')) ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>()) {
+ AddError("Expected eight hex digits up to 10ffff for \\U escape "
+ "sequence");
+ }
} else {
AddError("Invalid escape sequence in string literal.");
}
@@ -410,7 +459,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
}
}
- if (LookingAt<Letter>()) {
+ if (LookingAt<Letter>() && require_space_after_number_) {
AddError("Need space between number and identifier.");
} else if (current_char_ == '.') {
if (is_float) {
@@ -424,26 +473,51 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
return is_float ? TYPE_FLOAT : TYPE_INTEGER;
}
-void Tokenizer::ConsumeLineComment() {
+void Tokenizer::ConsumeLineComment(string* content) {
+ if (content != NULL) RecordTo(content);
+
while (current_char_ != '\0' && current_char_ != '\n') {
NextChar();
}
TryConsume('\n');
+
+ if (content != NULL) StopRecording();
}
-void Tokenizer::ConsumeBlockComment() {
+void Tokenizer::ConsumeBlockComment(string* content) {
int start_line = line_;
int start_column = column_ - 2;
+ if (content != NULL) RecordTo(content);
+
while (true) {
while (current_char_ != '\0' &&
current_char_ != '*' &&
- current_char_ != '/') {
+ current_char_ != '/' &&
+ current_char_ != '\n') {
NextChar();
}
- if (TryConsume('*') && TryConsume('/')) {
+ if (TryConsume('\n')) {
+ if (content != NULL) StopRecording();
+
+ // Consume leading whitespace and asterisk;
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ if (TryConsume('*')) {
+ if (TryConsume('/')) {
+ // End of comment.
+ break;
+ }
+ }
+
+ if (content != NULL) RecordTo(content);
+ } else if (TryConsume('*') && TryConsume('/')) {
// End of comment.
+ if (content != NULL) {
+ StopRecording();
+ // Strip trailing "*/".
+ content->erase(content->size() - 2);
+ }
break;
} else if (TryConsume('/') && current_char_ == '*') {
// Note: We didn't consume the '*' because if there is a '/' after it
@@ -454,42 +528,59 @@ void Tokenizer::ConsumeBlockComment() {
AddError("End-of-file inside block comment.");
error_collector_->AddError(
start_line, start_column, " Comment started here.");
+ if (content != NULL) StopRecording();
break;
}
}
}
+Tokenizer::NextCommentStatus Tokenizer::TryConsumeCommentStart() {
+ if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
+ if (TryConsume('/')) {
+ return LINE_COMMENT;
+ } else if (TryConsume('*')) {
+ return BLOCK_COMMENT;
+ } else {
+ // Oops, it was just a slash. Return it.
+ current_.type = TYPE_SYMBOL;
+ current_.text = "/";
+ current_.line = line_;
+ current_.column = column_ - 1;
+ current_.end_column = column_;
+ return SLASH_NOT_COMMENT;
+ }
+ } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
+ return LINE_COMMENT;
+ } else {
+ return NO_COMMENT;
+ }
+}
+
// -------------------------------------------------------------------
bool Tokenizer::Next() {
- TokenType last_token_type = current_.type;
-
- // Did we skip any characters after the last token?
- bool skipped_stuff = false;
+ previous_ = current_;
while (!read_error_) {
- if (TryConsumeOne<Whitespace>()) {
- ConsumeZeroOrMore<Whitespace>();
-
- } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
- // Starting a comment?
- if (TryConsume('/')) {
- ConsumeLineComment();
- } else if (TryConsume('*')) {
- ConsumeBlockComment();
- } else {
- // Oops, it was just a slash. Return it.
- current_.type = TYPE_SYMBOL;
- current_.text = "/";
- current_.line = line_;
- current_.column = column_ - 1;
+ ConsumeZeroOrMore<Whitespace>();
+
+ switch (TryConsumeCommentStart()) {
+ case LINE_COMMENT:
+ ConsumeLineComment(NULL);
+ continue;
+ case BLOCK_COMMENT:
+ ConsumeBlockComment(NULL);
+ continue;
+ case SLASH_NOT_COMMENT:
return true;
- }
+ case NO_COMMENT:
+ break;
+ }
- } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
- ConsumeLineComment();
+ // Check for EOF before continuing.
+ if (read_error_) break;
- } else if (LookingAt<Unprintable>() || current_char_ == '\0') {
+ if (LookingAt<Unprintable>() || current_char_ == '\0') {
AddError("Invalid control characters encountered in text.");
NextChar();
// Skip more unprintable characters, too. But, remember that '\0' is
@@ -517,7 +608,9 @@ bool Tokenizer::Next() {
if (TryConsumeOne<Digit>()) {
// It's a floating-point number.
- if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) {
+ if (previous_.type == TYPE_IDENTIFIER &&
+ current_.line == previous_.line &&
+ current_.column == previous_.end_column) {
// We don't accept syntax like "blah.123".
error_collector_->AddError(line_, column_ - 2,
"Need space between identifier and decimal point.");
@@ -535,6 +628,12 @@ bool Tokenizer::Next() {
ConsumeString('\'');
current_.type = TYPE_STRING;
} else {
+ // Check if the high order bit is set.
+ if (current_char_ & 0x80) {
+ error_collector_->AddError(line_, column_,
+ StringPrintf("Interpreting non ascii codepoint %d.",
+ static_cast<unsigned char>(current_char_)));
+ }
NextChar();
current_.type = TYPE_SYMBOL;
}
@@ -542,8 +641,6 @@ bool Tokenizer::Next() {
EndToken();
return true;
}
-
- skipped_stuff = true;
}
// EOF
@@ -551,9 +648,199 @@ bool Tokenizer::Next() {
current_.text.clear();
current_.line = line_;
current_.column = column_;
+ current_.end_column = column_;
return false;
}
+namespace {
+
+// Helper class for collecting comments and putting them in the right places.
+//
+// This basically just buffers the most recent comment until it can be decided
+// exactly where that comment should be placed. When Flush() is called, the
+// current comment goes into either prev_trailing_comments or detached_comments.
+// When the CommentCollector is destroyed, the last buffered comment goes into
+// next_leading_comments.
+class CommentCollector {
+ public:
+ CommentCollector(string* prev_trailing_comments,
+ vector<string>* detached_comments,
+ string* next_leading_comments)
+ : prev_trailing_comments_(prev_trailing_comments),
+ detached_comments_(detached_comments),
+ next_leading_comments_(next_leading_comments),
+ has_comment_(false),
+ is_line_comment_(false),
+ can_attach_to_prev_(true) {
+ if (prev_trailing_comments != NULL) prev_trailing_comments->clear();
+ if (detached_comments != NULL) detached_comments->clear();
+ if (next_leading_comments != NULL) next_leading_comments->clear();
+ }
+
+ ~CommentCollector() {
+ // Whatever is in the buffer is a leading comment.
+ if (next_leading_comments_ != NULL && has_comment_) {
+ comment_buffer_.swap(*next_leading_comments_);
+ }
+ }
+
+ // About to read a line comment. Get the comment buffer pointer in order to
+ // read into it.
+ string* GetBufferForLineComment() {
+ // We want to combine with previous line comments, but not block comments.
+ if (has_comment_ && !is_line_comment_) {
+ Flush();
+ }
+ has_comment_ = true;
+ is_line_comment_ = true;
+ return &comment_buffer_;
+ }
+
+ // About to read a block comment. Get the comment buffer pointer in order to
+ // read into it.
+ string* GetBufferForBlockComment() {
+ if (has_comment_) {
+ Flush();
+ }
+ has_comment_ = true;
+ is_line_comment_ = false;
+ return &comment_buffer_;
+ }
+
+ void ClearBuffer() {
+ comment_buffer_.clear();
+ has_comment_ = false;
+ }
+
+ // Called once we know that the comment buffer is complete and is *not*
+ // connected to the next token.
+ void Flush() {
+ if (has_comment_) {
+ if (can_attach_to_prev_) {
+ if (prev_trailing_comments_ != NULL) {
+ prev_trailing_comments_->append(comment_buffer_);
+ }
+ can_attach_to_prev_ = false;
+ } else {
+ if (detached_comments_ != NULL) {
+ detached_comments_->push_back(comment_buffer_);
+ }
+ }
+ ClearBuffer();
+ }
+ }
+
+ void DetachFromPrev() {
+ can_attach_to_prev_ = false;
+ }
+
+ private:
+ string* prev_trailing_comments_;
+ vector<string>* detached_comments_;
+ string* next_leading_comments_;
+
+ string comment_buffer_;
+
+ // True if any comments were read into comment_buffer_. This can be true even
+ // if comment_buffer_ is empty, namely if the comment was "/**/".
+ bool has_comment_;
+
+ // Is the comment in the comment buffer a line comment?
+ bool is_line_comment_;
+
+ // Is it still possible that we could be reading a comment attached to the
+ // previous token?
+ bool can_attach_to_prev_;
+};
+
+} // namespace
+
+bool Tokenizer::NextWithComments(string* prev_trailing_comments,
+ vector<string>* detached_comments,
+ string* next_leading_comments) {
+ CommentCollector collector(prev_trailing_comments, detached_comments,
+ next_leading_comments);
+
+ if (current_.type == TYPE_START) {
+ collector.DetachFromPrev();
+ } else {
+ // A comment appearing on the same line must be attached to the previous
+ // declaration.
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ switch (TryConsumeCommentStart()) {
+ case LINE_COMMENT:
+ ConsumeLineComment(collector.GetBufferForLineComment());
+
+ // Don't allow comments on subsequent lines to be attached to a trailing
+ // comment.
+ collector.Flush();
+ break;
+ case BLOCK_COMMENT:
+ ConsumeBlockComment(collector.GetBufferForBlockComment());
+
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ if (!TryConsume('\n')) {
+ // Oops, the next token is on the same line. If we recorded a comment
+ // we really have no idea which token it should be attached to.
+ collector.ClearBuffer();
+ return Next();
+ }
+
+ // Don't allow comments on subsequent lines to be attached to a trailing
+ // comment.
+ collector.Flush();
+ break;
+ case SLASH_NOT_COMMENT:
+ return true;
+ case NO_COMMENT:
+ if (!TryConsume('\n')) {
+ // The next token is on the same line. There are no comments.
+ return Next();
+ }
+ break;
+ }
+ }
+
+ // OK, we are now on the line *after* the previous token.
+ while (true) {
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+
+ switch (TryConsumeCommentStart()) {
+ case LINE_COMMENT:
+ ConsumeLineComment(collector.GetBufferForLineComment());
+ break;
+ case BLOCK_COMMENT:
+ ConsumeBlockComment(collector.GetBufferForBlockComment());
+
+ // Consume the rest of the line so that we don't interpret it as a
+ // blank line the next time around the loop.
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ TryConsume('\n');
+ break;
+ case SLASH_NOT_COMMENT:
+ return true;
+ case NO_COMMENT:
+ if (TryConsume('\n')) {
+ // Completely blank line.
+ collector.Flush();
+ collector.DetachFromPrev();
+ } else {
+ bool result = Next();
+ if (!result ||
+ current_.text == "}" ||
+ current_.text == "]" ||
+ current_.text == ")") {
+ // It looks like we're at the end of a scope. In this case it
+ // makes no sense to attach a comment to the following token.
+ collector.Flush();
+ }
+ return result;
+ }
+ break;
+ }
+ }
+}
+
// -------------------------------------------------------------------
// Token-parsing helpers. Remember that these don't need to report
// errors since any errors should already have been reported while
@@ -623,17 +910,138 @@ double Tokenizer::ParseFloat(const string& text) {
return result;
}
+// Helper to append a Unicode code point to a string as UTF8, without bringing
+// in any external dependencies.
+static void AppendUTF8(uint32 code_point, string* output) {
+ uint32 tmp = 0;
+ int len = 0;
+ if (code_point <= 0x7f) {
+ tmp = code_point;
+ len = 1;
+ } else if (code_point <= 0x07ff) {
+ tmp = 0x0000c080 |
+ ((code_point & 0x07c0) << 2) |
+ (code_point & 0x003f);
+ len = 2;
+ } else if (code_point <= 0xffff) {
+ tmp = 0x00e08080 |
+ ((code_point & 0xf000) << 4) |
+ ((code_point & 0x0fc0) << 2) |
+ (code_point & 0x003f);
+ len = 3;
+ } else if (code_point <= 0x1fffff) {
+ tmp = 0xf0808080 |
+ ((code_point & 0x1c0000) << 6) |
+ ((code_point & 0x03f000) << 4) |
+ ((code_point & 0x000fc0) << 2) |
+ (code_point & 0x003f);
+ len = 4;
+ } else {
+ // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
+ // normally only defined up to there as well.
+ StringAppendF(output, "\\U%08x", code_point);
+ return;
+ }
+ tmp = ghtonl(tmp);
+ output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
+}
+
+// Try to read <len> hex digits from ptr, and stuff the numeric result into
+// *result. Returns true if that many digits were successfully consumed.
+static bool ReadHexDigits(const char* ptr, int len, uint32* result) {
+ *result = 0;
+ if (len == 0) return false;
+ for (const char* end = ptr + len; ptr < end; ++ptr) {
+ if (*ptr == '\0') return false;
+ *result = (*result << 4) + DigitValue(*ptr);
+ }
+ return true;
+}
+
+// Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range
+// 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail
+// surrogate. These numbers are in a reserved range of Unicode code points, so
+// if we encounter such a pair we know how to parse it and convert it into a
+// single code point.
+static const uint32 kMinHeadSurrogate = 0xd800;
+static const uint32 kMaxHeadSurrogate = 0xdc00;
+static const uint32 kMinTrailSurrogate = 0xdc00;
+static const uint32 kMaxTrailSurrogate = 0xe000;
+
+static inline bool IsHeadSurrogate(uint32 code_point) {
+ return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate);
+}
+
+static inline bool IsTrailSurrogate(uint32 code_point) {
+ return (code_point >= kMinTrailSurrogate) &&
+ (code_point < kMaxTrailSurrogate);
+}
+
+// Combine a head and trail surrogate into a single Unicode code point.
+static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate) {
+ GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate));
+ GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate));
+ return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) |
+ (trail_surrogate - kMinTrailSurrogate));
+}
+
+// Convert the escape sequence parameter to a number of expected hex digits.
+static inline int UnicodeLength(char key) {
+ if (key == 'u') return 4;
+ if (key == 'U') return 8;
+ return 0;
+}
+
+// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt
+// to parse that sequence. On success, returns a pointer to the first char
+// beyond that sequence, and fills in *code_point. On failure, returns ptr
+// itself.
+static const char* FetchUnicodePoint(const char* ptr, uint32* code_point) {
+ const char* p = ptr;
+ // Fetch the code point.
+ const int len = UnicodeLength(*p++);
+ if (!ReadHexDigits(p, len, code_point))
+ return ptr;
+ p += len;
+
+ // Check if the code point we read is a "head surrogate." If so, then we
+ // expect it to be immediately followed by another code point which is a valid
+ // "trail surrogate," and together they form a UTF-16 pair which decodes into
+ // a single Unicode point. Trail surrogates may only use \u, not \U.
+ if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') {
+ uint32 trail_surrogate;
+ if (ReadHexDigits(p + 2, 4, &trail_surrogate) &&
+ IsTrailSurrogate(trail_surrogate)) {
+ *code_point = AssembleUTF16(*code_point, trail_surrogate);
+ p += 6;
+ }
+ // If this failed, then we just emit the head surrogate as a code point.
+ // It's bogus, but so is the string.
+ }
+
+ return p;
+}
+
+// The text string must begin and end with single or double quote
+// characters.
void Tokenizer::ParseStringAppend(const string& text, string* output) {
- // Reminder: text[0] is always the quote character. (If text is
- // empty, it's invalid, so we'll just return.)
- if (text.empty()) {
+ // Reminder: text[0] is always a quote character. (If text is
+ // empty, it's invalid, so we'll just return).
+ const size_t text_size = text.size();
+ if (text_size == 0) {
GOOGLE_LOG(DFATAL)
<< " Tokenizer::ParseStringAppend() passed text that could not"
" have been tokenized as a string: " << CEscape(text);
return;
}
- output->reserve(output->size() + text.size());
+ // Reserve room for new string. The branch is necessary because if
+ // there is already space available the reserve() call might
+ // downsize the output.
+ const size_t new_len = text_size + output->size();
+ if (new_len > output->capacity()) {
+ output->reserve(new_len);
+ }
// Loop through the string copying characters to "output" and
// interpreting escape sequences. Note that any invalid escape
@@ -671,19 +1079,47 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
}
output->push_back(static_cast<char>(code));
+ } else if (*ptr == 'u' || *ptr == 'U') {
+ uint32 unicode;
+ const char* end = FetchUnicodePoint(ptr, &unicode);
+ if (end == ptr) {
+ // Failure: Just dump out what we saw, don't try to parse it.
+ output->push_back(*ptr);
+ } else {
+ AppendUTF8(unicode, output);
+ ptr = end - 1; // Because we're about to ++ptr.
+ }
} else {
// Some other escape code.
output->push_back(TranslateEscape(*ptr));
}
- } else if (*ptr == text[0]) {
- // Ignore quote matching the starting quote.
+ } else if (*ptr == text[0] && ptr[1] == '\0') {
+ // Ignore final quote matching the starting quote.
} else {
output->push_back(*ptr);
}
}
+}
- return;
+template<typename CharacterClass>
+static bool AllInClass(const string& s) {
+ for (int i = 0; i < s.size(); ++i) {
+ if (!CharacterClass::InClass(s[i]))
+ return false;
+ }
+ return true;
+}
+
+bool Tokenizer::IsIdentifier(const string& text) {
+ // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+ if (text.size() == 0)
+ return false;
+ if (!Letter::InClass(text.at(0)))
+ return false;
+ if (!AllInClass<Alphanumeric>(text.substr(1)))
+ return false;
+ return true;
}
} // namespace io