aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/main/java/com/google/protobuf/TextFormat.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/main/java/com/google/protobuf/TextFormat.java')
-rw-r--r--java/src/main/java/com/google/protobuf/TextFormat.java1381
1 files changed, 1013 insertions, 368 deletions
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java
index cb23f0c..57d0ca6 100644
--- a/java/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/src/main/java/com/google/protobuf/TextFormat.java
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// http://code.google.com/p/protobuf/
+// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -31,63 +31,119 @@
package com.google.protobuf;
import com.google.protobuf.Descriptors.Descriptor;
-import com.google.protobuf.Descriptors.FieldDescriptor;
import com.google.protobuf.Descriptors.EnumDescriptor;
import com.google.protobuf.Descriptors.EnumValueDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
import java.io.IOException;
-import java.nio.CharBuffer;
import java.math.BigInteger;
+import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
- * Provide ascii text parsing and formatting support for proto2 instances.
+ * Provide text parsing and formatting support for proto2 instances.
* The implementation largely follows google/protobuf/text_format.cc.
*
* @author wenboz@google.com Wenbo Zhu
* @author kenton@google.com Kenton Varda
*/
public final class TextFormat {
- private TextFormat() {
- }
+ private TextFormat() {}
+
+ private static final Logger logger =
+ Logger.getLogger(TextFormat.class.getName());
+
+ private static final Printer DEFAULT_PRINTER = new Printer();
+ private static final Printer SINGLE_LINE_PRINTER =
+ (new Printer()).setSingleLineMode(true);
+ private static final Printer UNICODE_PRINTER =
+ (new Printer()).setEscapeNonAscii(false);
/**
* Outputs a textual representation of the Protocol Message supplied into
* the parameter output. (This representation is the new version of the
* classic "ProtocolPrinter" output from the original Protocol Buffer system)
*/
- public static void print(final Message message, final Appendable output)
- throws IOException {
- final TextGenerator generator = new TextGenerator(output);
- print(message, generator);
+ public static void print(
+ final MessageOrBuilder message, final Appendable output)
+ throws IOException {
+ DEFAULT_PRINTER.print(message, new TextGenerator(output));
}
/** Outputs a textual representation of {@code fields} to {@code output}. */
public static void print(final UnknownFieldSet fields,
final Appendable output)
throws IOException {
- final TextGenerator generator = new TextGenerator(output);
- printUnknownFields(fields, generator);
+ DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
+ }
+
+ /**
+ * Same as {@code print()}, except that non-ASCII characters are not
+ * escaped.
+ */
+ public static void printUnicode(
+ final MessageOrBuilder message, final Appendable output)
+ throws IOException {
+ UNICODE_PRINTER.print(message, new TextGenerator(output));
+ }
+
+ /**
+ * Same as {@code print()}, except that non-ASCII characters are not
+ * escaped.
+ */
+ public static void printUnicode(final UnknownFieldSet fields,
+ final Appendable output)
+ throws IOException {
+ UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output));
+ }
+
+ /**
+ * Generates a human readable form of this message, useful for debugging and
+ * other purposes, with no newline characters.
+ */
+ public static String shortDebugString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder sb = new StringBuilder();
+ SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
+ // Single line mode currently might have an extra space at the end.
+ return sb.toString().trim();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Generates a human readable form of the unknown fields, useful for debugging
+ * and other purposes, with no newline characters.
+ */
+ public static String shortDebugString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder sb = new StringBuilder();
+ SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
+ // Single line mode currently might have an extra space at the end.
+ return sb.toString().trim();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
}
/**
* Like {@code print()}, but writes directly to a {@code String} and
* returns it.
*/
- public static String printToString(final Message message) {
+ public static String printToString(final MessageOrBuilder message) {
try {
final StringBuilder text = new StringBuilder();
print(message, text);
return text.toString();
} catch (IOException e) {
- throw new RuntimeException(
- "Writing to a StringBuilder threw an IOException (should never " +
- "happen).", e);
+ throw new IllegalStateException(e);
}
}
@@ -101,28 +157,43 @@ public final class TextFormat {
print(fields, text);
return text.toString();
} catch (IOException e) {
- throw new RuntimeException(
- "Writing to a StringBuilder threw an IOException (should never " +
- "happen).", e);
+ throw new IllegalStateException(e);
}
}
- private static void print(final Message message,
- final TextGenerator generator)
- throws IOException {
- for (final Map.Entry<FieldDescriptor, Object> field :
- message.getAllFields().entrySet()) {
- printField(field.getKey(), field.getValue(), generator);
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ UNICODE_PRINTER.print(message, new TextGenerator(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
}
- printUnknownFields(message.getUnknownFields(), generator);
}
public static void printField(final FieldDescriptor field,
final Object value,
final Appendable output)
throws IOException {
- final TextGenerator generator = new TextGenerator(output);
- printField(field, value, generator);
+ DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
}
public static String printFieldToString(final FieldDescriptor field,
@@ -132,173 +203,298 @@ public final class TextFormat {
printField(field, value, text);
return text.toString();
} catch (IOException e) {
- throw new RuntimeException(
- "Writing to a StringBuilder threw an IOException (should never " +
- "happen).", e);
+ throw new IllegalStateException(e);
}
}
- private static void printField(final FieldDescriptor field,
- final Object value,
- final TextGenerator generator)
- throws IOException {
- if (field.isRepeated()) {
- // Repeated field. Print each element.
- for (final Object element : (List) value) {
- printSingleField(field, element, generator);
- }
- } else {
- printSingleField(field, value, generator);
+ /**
+ * Outputs a textual representation of the value of given field value.
+ *
+ * @param field the descriptor of the field
+ * @param value the value of the field
+ * @param output the output to which to append the formatted value
+ * @throws ClassCastException if the value is not appropriate for the
+ * given field descriptor
+ * @throws IOException if there is an exception writing to the output
+ */
+ public static void printFieldValue(final FieldDescriptor field,
+ final Object value,
+ final Appendable output)
+ throws IOException {
+ DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
+ }
+
+ /**
+ * Outputs a textual representation of the value of an unknown field.
+ *
+ * @param tag the field's tag number
+ * @param value the value of the field
+ * @param output the output to which to append the formatted value
+ * @throws ClassCastException if the value is not appropriate for the
+ * given field descriptor
+ * @throws IOException if there is an exception writing to the output
+ */
+ public static void printUnknownFieldValue(final int tag,
+ final Object value,
+ final Appendable output)
+ throws IOException {
+ printUnknownFieldValue(tag, value, new TextGenerator(output));
+ }
+
+ private static void printUnknownFieldValue(final int tag,
+ final Object value,
+ final TextGenerator generator)
+ throws IOException {
+ switch (WireFormat.getTagWireType(tag)) {
+ case WireFormat.WIRETYPE_VARINT:
+ generator.print(unsignedToString((Long) value));
+ break;
+ case WireFormat.WIRETYPE_FIXED32:
+ generator.print(
+ String.format((Locale) null, "0x%08x", (Integer) value));
+ break;
+ case WireFormat.WIRETYPE_FIXED64:
+ generator.print(String.format((Locale) null, "0x%016x", (Long) value));
+ break;
+ case WireFormat.WIRETYPE_LENGTH_DELIMITED:
+ generator.print("\"");
+ generator.print(escapeBytes((ByteString) value));
+ generator.print("\"");
+ break;
+ case WireFormat.WIRETYPE_START_GROUP:
+ DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
+ break;
+ default:
+ throw new IllegalArgumentException("Bad tag: " + tag);
}
}
- private static void printSingleField(final FieldDescriptor field,
- final Object value,
- final TextGenerator generator)
- throws IOException {
- if (field.isExtension()) {
- generator.print("[");
- // We special-case MessageSet elements for compatibility with proto1.
- if (field.getContainingType().getOptions().getMessageSetWireFormat()
- && (field.getType() == FieldDescriptor.Type.MESSAGE)
- && (field.isOptional())
- // object equality
- && (field.getExtensionScope() == field.getMessageType())) {
- generator.print(field.getMessageType().getFullName());
- } else {
- generator.print(field.getFullName());
+ /** Helper class for converting protobufs to text. */
+ private static final class Printer {
+ /** Whether to omit newlines from the output. */
+ boolean singleLineMode = false;
+
+ /** Whether to escape non ASCII characters with backslash and octal. */
+ boolean escapeNonAscii = true;
+
+ private Printer() {}
+
+ /** Setter of singleLineMode */
+ private Printer setSingleLineMode(boolean singleLineMode) {
+ this.singleLineMode = singleLineMode;
+ return this;
+ }
+
+ /** Setter of escapeNonAscii */
+ private Printer setEscapeNonAscii(boolean escapeNonAscii) {
+ this.escapeNonAscii = escapeNonAscii;
+ return this;
+ }
+
+ private void print(
+ final MessageOrBuilder message, final TextGenerator generator)
+ throws IOException {
+ for (Map.Entry<FieldDescriptor, Object> field
+ : message.getAllFields().entrySet()) {
+ printField(field.getKey(), field.getValue(), generator);
}
- generator.print("]");
- } else {
- if (field.getType() == FieldDescriptor.Type.GROUP) {
- // Groups must be serialized with their original capitalization.
- generator.print(field.getMessageType().getName());
+ printUnknownFields(message.getUnknownFields(), generator);
+ }
+
+ private void printField(final FieldDescriptor field, final Object value,
+ final TextGenerator generator) throws IOException {
+ if (field.isRepeated()) {
+ // Repeated field. Print each element.
+ for (Object element : (List<?>) value) {
+ printSingleField(field, element, generator);
+ }
} else {
- generator.print(field.getName());
+ printSingleField(field, value, generator);
}
}
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
- generator.print(" {\n");
- generator.indent();
- } else {
- generator.print(": ");
- }
+ private void printSingleField(final FieldDescriptor field,
+ final Object value,
+ final TextGenerator generator)
+ throws IOException {
+ if (field.isExtension()) {
+ generator.print("[");
+ // We special-case MessageSet elements for compatibility with proto1.
+ if (field.getContainingType().getOptions().getMessageSetWireFormat()
+ && (field.getType() == FieldDescriptor.Type.MESSAGE)
+ && (field.isOptional())
+ // object equality
+ && (field.getExtensionScope() == field.getMessageType())) {
+ generator.print(field.getMessageType().getFullName());
+ } else {
+ generator.print(field.getFullName());
+ }
+ generator.print("]");
+ } else {
+ if (field.getType() == FieldDescriptor.Type.GROUP) {
+ // Groups must be serialized with their original capitalization.
+ generator.print(field.getMessageType().getName());
+ } else {
+ generator.print(field.getName());
+ }
+ }
- printFieldValue(field, value, generator);
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ if (singleLineMode) {
+ generator.print(" { ");
+ } else {
+ generator.print(" {\n");
+ generator.indent();
+ }
+ } else {
+ generator.print(": ");
+ }
+
+ printFieldValue(field, value, generator);
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
- generator.outdent();
- generator.print("}");
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ if (singleLineMode) {
+ generator.print("} ");
+ } else {
+ generator.outdent();
+ generator.print("}\n");
+ }
+ } else {
+ if (singleLineMode) {
+ generator.print(" ");
+ } else {
+ generator.print("\n");
+ }
+ }
}
- generator.print("\n");
- }
- private static void printFieldValue(final FieldDescriptor field,
- final Object value,
- final TextGenerator generator)
- throws IOException {
- switch (field.getType()) {
- case INT32:
- case INT64:
- case SINT32:
- case SINT64:
- case SFIXED32:
- case SFIXED64:
- case FLOAT:
- case DOUBLE:
- case BOOL:
- // Good old toString() does what we want for these types.
- generator.print(value.toString());
- break;
+ private void printFieldValue(final FieldDescriptor field,
+ final Object value,
+ final TextGenerator generator)
+ throws IOException {
+ switch (field.getType()) {
+ case INT32:
+ case SINT32:
+ case SFIXED32:
+ generator.print(((Integer) value).toString());
+ break;
- case UINT32:
- case FIXED32:
- generator.print(unsignedToString((Integer) value));
- break;
+ case INT64:
+ case SINT64:
+ case SFIXED64:
+ generator.print(((Long) value).toString());
+ break;
- case UINT64:
- case FIXED64:
- generator.print(unsignedToString((Long) value));
- break;
+ case BOOL:
+ generator.print(((Boolean) value).toString());
+ break;
- case STRING:
- generator.print("\"");
- generator.print(escapeText((String) value));
- generator.print("\"");
- break;
+ case FLOAT:
+ generator.print(((Float) value).toString());
+ break;
- case BYTES:
- generator.print("\"");
- generator.print(escapeBytes((ByteString) value));
- generator.print("\"");
- break;
+ case DOUBLE:
+ generator.print(((Double) value).toString());
+ break;
- case ENUM:
- generator.print(((EnumValueDescriptor) value).getName());
- break;
+ case UINT32:
+ case FIXED32:
+ generator.print(unsignedToString((Integer) value));
+ break;
- case MESSAGE:
- case GROUP:
- print((Message) value, generator);
- break;
- }
- }
+ case UINT64:
+ case FIXED64:
+ generator.print(unsignedToString((Long) value));
+ break;
- private static void printUnknownFields(final UnknownFieldSet unknownFields,
- final TextGenerator generator)
- throws IOException {
- for (final Map.Entry<Integer, UnknownFieldSet.Field> entry :
- unknownFields.asMap().entrySet()) {
- final String prefix = entry.getKey().toString() + ": ";
- final UnknownFieldSet.Field field = entry.getValue();
+ case STRING:
+ generator.print("\"");
+ generator.print(escapeNonAscii ?
+ escapeText((String) value) :
+ escapeDoubleQuotesAndBackslashes((String) value));
+ generator.print("\"");
+ break;
- for (final long value : field.getVarintList()) {
- generator.print(entry.getKey().toString());
- generator.print(": ");
- generator.print(unsignedToString(value));
- generator.print("\n");
+ case BYTES:
+ generator.print("\"");
+ if (value instanceof ByteString) {
+ generator.print(escapeBytes((ByteString) value));
+ } else {
+ generator.print(escapeBytes((byte[]) value));
+ }
+ generator.print("\"");
+ break;
+
+ case ENUM:
+ generator.print(((EnumValueDescriptor) value).getName());
+ break;
+
+ case MESSAGE:
+ case GROUP:
+ print((Message) value, generator);
+ break;
}
- for (final int value : field.getFixed32List()) {
- generator.print(entry.getKey().toString());
- generator.print(": ");
- generator.print(String.format((Locale) null, "0x%08x", value));
- generator.print("\n");
+ }
+
+ private void printUnknownFields(final UnknownFieldSet unknownFields,
+ final TextGenerator generator)
+ throws IOException {
+ for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
+ unknownFields.asMap().entrySet()) {
+ final int number = entry.getKey();
+ final UnknownFieldSet.Field field = entry.getValue();
+ printUnknownField(number, WireFormat.WIRETYPE_VARINT,
+ field.getVarintList(), generator);
+ printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
+ field.getFixed32List(), generator);
+ printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
+ field.getFixed64List(), generator);
+ printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
+ field.getLengthDelimitedList(), generator);
+ for (final UnknownFieldSet value : field.getGroupList()) {
+ generator.print(entry.getKey().toString());
+ if (singleLineMode) {
+ generator.print(" { ");
+ } else {
+ generator.print(" {\n");
+ generator.indent();
+ }
+ printUnknownFields(value, generator);
+ if (singleLineMode) {
+ generator.print("} ");
+ } else {
+ generator.outdent();
+ generator.print("}\n");
+ }
+ }
}
- for (final long value : field.getFixed64List()) {
- generator.print(entry.getKey().toString());
+ }
+
+ private void printUnknownField(final int number,
+ final int wireType,
+ final List<?> values,
+ final TextGenerator generator)
+ throws IOException {
+ for (final Object value : values) {
+ generator.print(String.valueOf(number));
generator.print(": ");
- generator.print(String.format((Locale) null, "0x%016x", value));
- generator.print("\n");
- }
- for (final ByteString value : field.getLengthDelimitedList()) {
- generator.print(entry.getKey().toString());
- generator.print(": \"");
- generator.print(escapeBytes(value));
- generator.print("\"\n");
- }
- for (final UnknownFieldSet value : field.getGroupList()) {
- generator.print(entry.getKey().toString());
- generator.print(" {\n");
- generator.indent();
- printUnknownFields(value, generator);
- generator.outdent();
- generator.print("}\n");
+ printUnknownFieldValue(wireType, value, generator);
+ generator.print(singleLineMode ? " " : "\n");
}
}
}
/** Convert an unsigned 32-bit integer to a string. */
- private static String unsignedToString(final int value) {
+ public static String unsignedToString(final int value) {
if (value >= 0) {
return Integer.toString(value);
} else {
- return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
+ return Long.toString(value & 0x00000000FFFFFFFFL);
}
}
/** Convert an unsigned 64-bit integer to a string. */
- private static String unsignedToString(final long value) {
+ public static String unsignedToString(final long value) {
if (value >= 0) {
return Long.toString(value);
} else {
@@ -313,9 +509,9 @@ public final class TextFormat {
* An inner class for writing text to the output stream.
*/
private static final class TextGenerator {
- private Appendable output;
- private boolean atStartOfLine = true;
+ private final Appendable output;
private final StringBuilder indent = new StringBuilder();
+ private boolean atStartOfLine = true;
private TextGenerator(final Appendable output) {
this.output = output;
@@ -352,17 +548,16 @@ public final class TextFormat {
for (int i = 0; i < size; i++) {
if (text.charAt(i) == '\n') {
- write(text.subSequence(pos, size), i - pos + 1);
+ write(text.subSequence(pos, i + 1));
pos = i + 1;
atStartOfLine = true;
}
}
- write(text.subSequence(pos, size), size - pos);
+ write(text.subSequence(pos, size));
}
- private void write(final CharSequence data, final int size)
- throws IOException {
- if (size == 0) {
+ private void write(final CharSequence data) throws IOException {
+ if (data.length() == 0) {
return;
}
if (atStartOfLine) {
@@ -421,7 +616,7 @@ public final class TextFormat {
private int previousLine = 0;
private int previousColumn = 0;
- // We use possesive quantifiers (*+ and ++) because otherwise the Java
+ // We use possessive quantifiers (*+ and ++) because otherwise the Java
// regex matcher has stack overflows on large inputs.
private static final Pattern WHITESPACE =
Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
@@ -539,6 +734,14 @@ public final class TextFormat {
}
/**
+ * Returns {@code true} if the current token's text is equal to that
+ * specified.
+ */
+ public boolean lookingAt(String text) {
+ return currentToken.equals(text);
+ }
+
+ /**
* If the next token is an identifier, consume it and return its value.
* Otherwise, throw a {@link ParseException}.
*/
@@ -551,7 +754,8 @@ public final class TextFormat {
(c == '_') || (c == '.')) {
// OK
} else {
- throw parseException("Expected identifier.");
+ throw parseException(
+ "Expected identifier. Found '" + currentToken + "'");
}
}
@@ -561,6 +765,19 @@ public final class TextFormat {
}
/**
+ * If the next token is an identifier, consume it and return {@code true}.
+ * Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeIdentifier() {
+ try {
+ consumeIdentifier();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
* If the next token is a 32-bit signed integer, consume it and return its
* value. Otherwise, throw a {@link ParseException}.
*/
@@ -603,6 +820,19 @@ public final class TextFormat {
}
/**
+ * If the next token is a 64-bit signed integer, consume it and return
+ * {@code true}. Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeInt64() {
+ try {
+ consumeInt64();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
* If the next token is a 64-bit unsigned integer, consume it and return its
* value. Otherwise, throw a {@link ParseException}.
*/
@@ -617,6 +847,19 @@ public final class TextFormat {
}
/**
+ * If the next token is a 64-bit unsigned integer, consume it and return
+ * {@code true}. Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeUInt64() {
+ try {
+ consumeUInt64();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
* If the next token is a double, consume it and return its value.
* Otherwise, throw a {@link ParseException}.
*/
@@ -642,6 +885,19 @@ public final class TextFormat {
}
/**
+ * If the next token is a double, consume it and return {@code true}.
+ * Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeDouble() {
+ try {
+ consumeDouble();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
* If the next token is a float, consume it and return its value.
* Otherwise, throw a {@link ParseException}.
*/
@@ -667,14 +923,31 @@ public final class TextFormat {
}
/**
+ * If the next token is a float, consume it and return {@code true}.
+ * Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeFloat() {
+ try {
+ consumeFloat();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
* If the next token is a boolean, consume it and return its value.
* Otherwise, throw a {@link ParseException}.
*/
public boolean consumeBoolean() throws ParseException {
- if (currentToken.equals("true")) {
+ if (currentToken.equals("true") ||
+ currentToken.equals("t") ||
+ currentToken.equals("1")) {
nextToken();
return true;
- } else if (currentToken.equals("false")) {
+ } else if (currentToken.equals("false") ||
+ currentToken.equals("f") ||
+ currentToken.equals("0")) {
nextToken();
return false;
} else {
@@ -691,6 +964,19 @@ public final class TextFormat {
}
/**
+ * If the next token is a string, consume it and return true. Otherwise,
+ * return false.
+ */
+ public boolean tryConsumeString() {
+ try {
+ consumeString();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
* If the next token is a string, consume it, unescape it as a
* {@link ByteString}, and return it. Otherwise, throw a
* {@link ParseException}.
@@ -710,7 +996,8 @@ public final class TextFormat {
* multiple adjacent tokens which are automatically concatenated, like in
* C or Python.
*/
- private void consumeByteString(List<ByteString> list) throws ParseException {
+ private void consumeByteString(List<ByteString> list)
+ throws ParseException {
final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
: '\0';
if (quote != '\"' && quote != '\'') {
@@ -740,7 +1027,7 @@ public final class TextFormat {
public ParseException parseException(final String description) {
// Note: People generally prefer one-based line and column numbers.
return new ParseException(
- (line + 1) + ":" + (column + 1) + ": " + description);
+ line + 1, column + 1, description);
}
/**
@@ -751,7 +1038,7 @@ public final class TextFormat {
final String description) {
// Note: People generally prefer one-based line and column numbers.
return new ParseException(
- (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
+ previousLine + 1, previousColumn + 1, description);
}
/**
@@ -776,11 +1063,58 @@ public final class TextFormat {
public static class ParseException extends IOException {
private static final long serialVersionUID = 3196188060225107702L;
+ private final int line;
+ private final int column;
+
+ /** Create a new instance, with -1 as the line and column numbers. */
public ParseException(final String message) {
- super(message);
+ this(-1, -1, message);
+ }
+
+ /**
+ * Create a new instance
+ *
+ * @param line the line number where the parse error occurred,
+ * using 1-offset.
+ * @param column the column number where the parser error occurred,
+ * using 1-offset.
+ */
+ public ParseException(final int line, final int column,
+ final String message) {
+ super(Integer.toString(line) + ":" + column + ": " + message);
+ this.line = line;
+ this.column = column;
+ }
+
+ /**
+ * Return the line where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Return the column where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getColumn() {
+ return column;
}
}
+ private static final Parser PARSER = Parser.newBuilder().build();
+
+ /**
+ * Return a {@link Parser} instance which can parse text-format
+ * messages. The returned instance is thread-safe.
+ */
+ public static Parser getParser() {
+ return PARSER;
+ }
+
/**
* Parse a text-format message from {@code input} and merge the contents
* into {@code builder}.
@@ -788,7 +1122,7 @@ public final class TextFormat {
public static void merge(final Readable input,
final Message.Builder builder)
throws IOException {
- merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+ PARSER.merge(input, builder);
}
/**
@@ -798,7 +1132,7 @@ public final class TextFormat {
public static void merge(final CharSequence input,
final Message.Builder builder)
throws ParseException {
- merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+ PARSER.merge(input, builder);
}
/**
@@ -810,35 +1144,9 @@ public final class TextFormat {
final ExtensionRegistry extensionRegistry,
final Message.Builder builder)
throws IOException {
- // Read the entire input to a String then parse that.
-
- // If StreamTokenizer were not quite so crippled, or if there were a kind
- // of Reader that could read in chunks that match some particular regex,
- // or if we wanted to write a custom Reader to tokenize our stream, then
- // we would not have to read to one big String. Alas, none of these is
- // the case. Oh well.
-
- merge(toStringBuilder(input), extensionRegistry, builder);
+ PARSER.merge(input, extensionRegistry, builder);
}
- private static final int BUFFER_SIZE = 4096;
-
- // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
- // overhead is worthwhile
- private static StringBuilder toStringBuilder(final Readable input)
- throws IOException {
- final StringBuilder text = new StringBuilder();
- final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
- while (true) {
- final int n = input.read(buffer);
- if (n == -1) {
- break;
- }
- buffer.flip();
- text.append(buffer, 0, n);
- }
- return text;
- }
/**
* Parse a text-format message from {@code input} and merge the contents
@@ -849,187 +1157,466 @@ public final class TextFormat {
final ExtensionRegistry extensionRegistry,
final Message.Builder builder)
throws ParseException {
- final Tokenizer tokenizer = new Tokenizer(input);
-
- while (!tokenizer.atEnd()) {
- mergeField(tokenizer, extensionRegistry, builder);
- }
+ PARSER.merge(input, extensionRegistry, builder);
}
+
/**
- * Parse a single field from {@code tokenizer} and merge it into
- * {@code builder}.
+ * Parser for text-format proto2 instances. This class is thread-safe.
+ * The implementation largely follows google/protobuf/text_format.cc.
+ *
+ * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or
+ * {@link Builder} to control the parser behavior.
*/
- private static void mergeField(final Tokenizer tokenizer,
- final ExtensionRegistry extensionRegistry,
- final Message.Builder builder)
- throws ParseException {
- FieldDescriptor field;
- final Descriptor type = builder.getDescriptorForType();
- ExtensionRegistry.ExtensionInfo extension = null;
+ public static class Parser {
+ /**
+ * Determines if repeated values for non-repeated fields and
+ * oneofs are permitted. For example, given required/optional field "foo"
+ * and a oneof containing "baz" and "qux":
+ * <li>
+ * <ul>"foo: 1 foo: 2"
+ * <ul>"baz: 1 qux: 2"
+ * <ul>merging "foo: 2" into a proto in which foo is already set, or
+ * <ul>merging "qux: 2" into a proto in which baz is already set.
+ * </li>
+ */
+ public enum SingularOverwritePolicy {
+ /** The last value is retained. */
+ ALLOW_SINGULAR_OVERWRITES,
+ /** An error is issued. */
+ FORBID_SINGULAR_OVERWRITES
+ }
- if (tokenizer.tryConsume("[")) {
- // An extension.
- final StringBuilder name =
- new StringBuilder(tokenizer.consumeIdentifier());
- while (tokenizer.tryConsume(".")) {
- name.append('.');
- name.append(tokenizer.consumeIdentifier());
- }
+ private final boolean allowUnknownFields;
+ private final SingularOverwritePolicy singularOverwritePolicy;
+
+ private Parser(boolean allowUnknownFields,
+ SingularOverwritePolicy singularOverwritePolicy) {
+ this.allowUnknownFields = allowUnknownFields;
+ this.singularOverwritePolicy = singularOverwritePolicy;
+ }
+
+ /**
+ * Returns a new instance of {@link Builder}.
+ */
+ public static Builder newBuilder() {
+ return new Builder();
+ }
- extension = extensionRegistry.findExtensionByName(name.toString());
+ /**
+ * Builder that can be used to obtain new instances of {@link Parser}.
+ */
+ public static class Builder {
+ private boolean allowUnknownFields = false;
+ private SingularOverwritePolicy singularOverwritePolicy =
+ SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
+
+ /**
+ * Sets parser behavior when a non-repeated field appears more than once.
+ */
+ public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
+ this.singularOverwritePolicy = p;
+ return this;
+ }
- if (extension == null) {
- throw tokenizer.parseExceptionPreviousToken(
- "Extension \"" + name + "\" not found in the ExtensionRegistry.");
- } else if (extension.descriptor.getContainingType() != type) {
- throw tokenizer.parseExceptionPreviousToken(
- "Extension \"" + name + "\" does not extend message type \"" +
- type.getFullName() + "\".");
+ public Parser build() {
+ return new Parser(allowUnknownFields, singularOverwritePolicy);
}
+ }
- tokenizer.consume("]");
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}.
+ */
+ public void merge(final Readable input,
+ final Message.Builder builder)
+ throws IOException {
+ merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+ }
- field = extension.descriptor;
- } else {
- final String name = tokenizer.consumeIdentifier();
- field = type.findFieldByName(name);
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}.
+ */
+ public void merge(final CharSequence input,
+ final Message.Builder builder)
+ throws ParseException {
+ merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+ }
- // Group names are expected to be capitalized as they appear in the
- // .proto file, which actually matches their type names, not their field
- // names.
- if (field == null) {
- // Explicitly specify US locale so that this code does not break when
- // executing in Turkey.
- final String lowerName = name.toLowerCase(Locale.US);
- field = type.findFieldByName(lowerName);
- // If the case-insensitive match worked but the field is NOT a group,
- if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
- field = null;
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}. Extensions will be recognized if they are
+ * registered in {@code extensionRegistry}.
+ */
+ public void merge(final Readable input,
+ final ExtensionRegistry extensionRegistry,
+ final Message.Builder builder)
+ throws IOException {
+ // Read the entire input to a String then parse that.
+
+ // If StreamTokenizer were not quite so crippled, or if there were a kind
+ // of Reader that could read in chunks that match some particular regex,
+ // or if we wanted to write a custom Reader to tokenize our stream, then
+ // we would not have to read to one big String. Alas, none of these is
+ // the case. Oh well.
+
+ merge(toStringBuilder(input), extensionRegistry, builder);
+ }
+
+
+ private static final int BUFFER_SIZE = 4096;
+
+ // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
+ // overhead is worthwhile
+ private static StringBuilder toStringBuilder(final Readable input)
+ throws IOException {
+ final StringBuilder text = new StringBuilder();
+ final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
+ while (true) {
+ final int n = input.read(buffer);
+ if (n == -1) {
+ break;
}
+ buffer.flip();
+ text.append(buffer, 0, n);
}
- // Again, special-case group names as described above.
- if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
- !field.getMessageType().getName().equals(name)) {
- field = null;
- }
+ return text;
+ }
- if (field == null) {
- throw tokenizer.parseExceptionPreviousToken(
- "Message type \"" + type.getFullName() +
- "\" has no field named \"" + name + "\".");
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}. Extensions will be recognized if they are
+ * registered in {@code extensionRegistry}.
+ */
+ public void merge(final CharSequence input,
+ final ExtensionRegistry extensionRegistry,
+ final Message.Builder builder)
+ throws ParseException {
+ final Tokenizer tokenizer = new Tokenizer(input);
+ MessageReflection.BuilderAdapter target =
+ new MessageReflection.BuilderAdapter(builder);
+
+ while (!tokenizer.atEnd()) {
+ mergeField(tokenizer, extensionRegistry, target);
}
}
- Object value = null;
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
- tokenizer.tryConsume(":"); // optional
+ /**
+ * Parse a single field from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void mergeField(final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target)
+ throws ParseException {
+ FieldDescriptor field = null;
+ final Descriptor type = target.getDescriptorForType();
+ ExtensionRegistry.ExtensionInfo extension = null;
+
+ if (tokenizer.tryConsume("[")) {
+ // An extension.
+ final StringBuilder name =
+ new StringBuilder(tokenizer.consumeIdentifier());
+ while (tokenizer.tryConsume(".")) {
+ name.append('.');
+ name.append(tokenizer.consumeIdentifier());
+ }
- final String endToken;
- if (tokenizer.tryConsume("<")) {
- endToken = ">";
- } else {
- tokenizer.consume("{");
- endToken = "}";
- }
+ extension = target.findExtensionByName(
+ extensionRegistry, name.toString());
+
+ if (extension == null) {
+ if (!allowUnknownFields) {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Extension \"" + name + "\" not found in the ExtensionRegistry.");
+ } else {
+ logger.warning(
+ "Extension \"" + name + "\" not found in the ExtensionRegistry.");
+ }
+ } else {
+ if (extension.descriptor.getContainingType() != type) {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Extension \"" + name + "\" does not extend message type \"" +
+ type.getFullName() + "\".");
+ }
+ field = extension.descriptor;
+ }
- final Message.Builder subBuilder;
- if (extension == null) {
- subBuilder = builder.newBuilderForField(field);
+ tokenizer.consume("]");
} else {
- subBuilder = extension.defaultInstance.newBuilderForType();
- }
+ final String name = tokenizer.consumeIdentifier();
+ field = type.findFieldByName(name);
+
+ // Group names are expected to be capitalized as they appear in the
+ // .proto file, which actually matches their type names, not their field
+ // names.
+ if (field == null) {
+ // Explicitly specify US locale so that this code does not break when
+ // executing in Turkey.
+ final String lowerName = name.toLowerCase(Locale.US);
+ field = type.findFieldByName(lowerName);
+ // If the case-insensitive match worked but the field is NOT a group,
+ if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
+ field = null;
+ }
+ }
+ // Again, special-case group names as described above.
+ if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
+ !field.getMessageType().getName().equals(name)) {
+ field = null;
+ }
- while (!tokenizer.tryConsume(endToken)) {
- if (tokenizer.atEnd()) {
- throw tokenizer.parseException(
- "Expected \"" + endToken + "\".");
+ if (field == null) {
+ if (!allowUnknownFields) {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Message type \"" + type.getFullName() +
+ "\" has no field named \"" + name + "\".");
+ } else {
+ logger.warning(
+ "Message type \"" + type.getFullName() +
+ "\" has no field named \"" + name + "\".");
+ }
}
- mergeField(tokenizer, extensionRegistry, subBuilder);
}
- value = subBuilder.build();
-
- } else {
- tokenizer.consume(":");
+ // Skips unknown fields.
+ if (field == null) {
+ // Try to guess the type of this field.
+ // If this field is not a message, there should be a ":" between the
+ // field name and the field value and also the field value should not
+ // start with "{" or "<" which indicates the begining of a message body.
+ // If there is no ":" or there is a "{" or "<" after ":", this field has
+ // to be a message or the input is ill-formed.
+ if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") &&
+ !tokenizer.lookingAt("<")) {
+ skipFieldValue(tokenizer);
+ } else {
+ skipFieldMessage(tokenizer);
+ }
+ return;
+ }
- switch (field.getType()) {
- case INT32:
- case SINT32:
- case SFIXED32:
- value = tokenizer.consumeInt32();
- break;
+ // Handle potential ':'.
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ tokenizer.tryConsume(":"); // optional
+ } else {
+ tokenizer.consume(":"); // required
+ }
+ // Support specifying repeated field values as a comma-separated list.
+ // Ex."foo: [1, 2, 3]"
+ if (field.isRepeated() && tokenizer.tryConsume("[")) {
+ while (true) {
+ consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+ if (tokenizer.tryConsume("]")) {
+ // End of list.
+ break;
+ }
+ tokenizer.consume(",");
+ }
+ } else {
+ consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+ }
+ }
- case INT64:
- case SINT64:
- case SFIXED64:
- value = tokenizer.consumeInt64();
- break;
+ /**
+ * Parse a single field value from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void consumeFieldValue(
+ final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ final FieldDescriptor field,
+ final ExtensionRegistry.ExtensionInfo extension)
+ throws ParseException {
+ Object value = null;
+
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ final String endToken;
+ if (tokenizer.tryConsume("<")) {
+ endToken = ">";
+ } else {
+ tokenizer.consume("{");
+ endToken = "}";
+ }
- case UINT32:
- case FIXED32:
- value = tokenizer.consumeUInt32();
- break;
+ final MessageReflection.MergeTarget subField;
+ subField = target.newMergeTargetForField(field,
+ (extension == null) ? null : extension.defaultInstance);
- case UINT64:
- case FIXED64:
- value = tokenizer.consumeUInt64();
- break;
+ while (!tokenizer.tryConsume(endToken)) {
+ if (tokenizer.atEnd()) {
+ throw tokenizer.parseException(
+ "Expected \"" + endToken + "\".");
+ }
+ mergeField(tokenizer, extensionRegistry, subField);
+ }
- case FLOAT:
- value = tokenizer.consumeFloat();
- break;
+ value = subField.finish();
- case DOUBLE:
- value = tokenizer.consumeDouble();
- break;
+ } else {
+ switch (field.getType()) {
+ case INT32:
+ case SINT32:
+ case SFIXED32:
+ value = tokenizer.consumeInt32();
+ break;
+
+ case INT64:
+ case SINT64:
+ case SFIXED64:
+ value = tokenizer.consumeInt64();
+ break;
+
+ case UINT32:
+ case FIXED32:
+ value = tokenizer.consumeUInt32();
+ break;
+
+ case UINT64:
+ case FIXED64:
+ value = tokenizer.consumeUInt64();
+ break;
+
+ case FLOAT:
+ value = tokenizer.consumeFloat();
+ break;
+
+ case DOUBLE:
+ value = tokenizer.consumeDouble();
+ break;
+
+ case BOOL:
+ value = tokenizer.consumeBoolean();
+ break;
+
+ case STRING:
+ value = tokenizer.consumeString();
+ break;
+
+ case BYTES:
+ value = tokenizer.consumeByteString();
+ break;
+
+ case ENUM:
+ final EnumDescriptor enumType = field.getEnumType();
+
+ if (tokenizer.lookingAtInteger()) {
+ final int number = tokenizer.consumeInt32();
+ value = enumType.findValueByNumber(number);
+ if (value == null) {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Enum type \"" + enumType.getFullName() +
+ "\" has no value with number " + number + '.');
+ }
+ } else {
+ final String id = tokenizer.consumeIdentifier();
+ value = enumType.findValueByName(id);
+ if (value == null) {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Enum type \"" + enumType.getFullName() +
+ "\" has no value named \"" + id + "\".");
+ }
+ }
- case BOOL:
- value = tokenizer.consumeBoolean();
- break;
+ break;
- case STRING:
- value = tokenizer.consumeString();
- break;
+ case MESSAGE:
+ case GROUP:
+ throw new RuntimeException("Can't get here.");
+ }
+ }
- case BYTES:
- value = tokenizer.consumeByteString();
- break;
+ if (field.isRepeated()) {
+ target.addRepeatedField(field, value);
+ } else if ((singularOverwritePolicy
+ == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+ && target.hasField(field)) {
+ throw tokenizer.parseExceptionPreviousToken("Non-repeated field \""
+ + field.getFullName() + "\" cannot be overwritten.");
+ } else if ((singularOverwritePolicy
+ == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+ && field.getContainingOneof() != null
+ && target.hasOneof(field.getContainingOneof())) {
+ Descriptors.OneofDescriptor oneof = field.getContainingOneof();
+ throw tokenizer.parseExceptionPreviousToken("Field \""
+ + field.getFullName() + "\" is specified along with field \""
+ + target.getOneofFieldDescriptor(oneof).getFullName()
+ + "\", another member of oneof \"" + oneof.getName() + "\".");
+ } else {
+ target.setField(field, value);
+ }
+ }
- case ENUM:
- final EnumDescriptor enumType = field.getEnumType();
-
- if (tokenizer.lookingAtInteger()) {
- final int number = tokenizer.consumeInt32();
- value = enumType.findValueByNumber(number);
- if (value == null) {
- throw tokenizer.parseExceptionPreviousToken(
- "Enum type \"" + enumType.getFullName() +
- "\" has no value with number " + number + '.');
- }
- } else {
- final String id = tokenizer.consumeIdentifier();
- value = enumType.findValueByName(id);
- if (value == null) {
- throw tokenizer.parseExceptionPreviousToken(
- "Enum type \"" + enumType.getFullName() +
- "\" has no value named \"" + id + "\".");
- }
- }
+ /**
+ * Skips the next field including the field's name and value.
+ */
+ private void skipField(Tokenizer tokenizer) throws ParseException {
+ if (tokenizer.tryConsume("[")) {
+ // Extension name.
+ do {
+ tokenizer.consumeIdentifier();
+ } while (tokenizer.tryConsume("."));
+ tokenizer.consume("]");
+ } else {
+ tokenizer.consumeIdentifier();
+ }
- break;
+ // Try to guess the type of this field.
+ // If this field is not a message, there should be a ":" between the
+ // field name and the field value and also the field value should not
+ // start with "{" or "<" which indicates the begining of a message body.
+ // If there is no ":" or there is a "{" or "<" after ":", this field has
+ // to be a message or the input is ill-formed.
+ if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") &&
+ !tokenizer.lookingAt("{")) {
+ skipFieldValue(tokenizer);
+ } else {
+ skipFieldMessage(tokenizer);
+ }
+ // For historical reasons, fields may optionally be separated by commas or
+ // semicolons.
+ if (!tokenizer.tryConsume(";")) {
+ tokenizer.tryConsume(",");
+ }
+ }
- case MESSAGE:
- case GROUP:
- throw new RuntimeException("Can't get here.");
+ /**
+ * Skips the whole body of a message including the beginning delimeter and
+ * the ending delimeter.
+ */
+ private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
+ final String delimiter;
+ if (tokenizer.tryConsume("<")) {
+ delimiter = ">";
+ } else {
+ tokenizer.consume("{");
+ delimiter = "}";
}
+ while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
+ skipField(tokenizer);
+ }
+ tokenizer.consume(delimiter);
}
- if (field.isRepeated()) {
- builder.addRepeatedField(field, value);
- } else {
- builder.setField(field, value);
+ /**
+ * Skips a field value.
+ */
+ private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
+ if (tokenizer.tryConsumeString()) {
+ while (tokenizer.tryConsumeString()) {}
+ return;
+ }
+ if (!tokenizer.tryConsumeIdentifier() && // includes enum & boolean
+ !tokenizer.tryConsumeInt64() && // includes int32
+ !tokenizer.tryConsumeUInt64() && // includes uint32
+ !tokenizer.tryConsumeDouble() &&
+ !tokenizer.tryConsumeFloat()) {
+ throw tokenizer.parseException(
+ "Invalid field value: " + tokenizer.currentToken);
+ }
}
}
@@ -1039,6 +1626,11 @@ public final class TextFormat {
// Some of these methods are package-private because Descriptors.java uses
// them.
+ private interface ByteSequence {
+ int size();
+ byte byteAt(int offset);
+ }
+
/**
* Escapes bytes in the format used in protocol buffer text format, which
* is the same as the format used for C string literals. All bytes
@@ -1047,7 +1639,7 @@ public final class TextFormat {
* which no defined short-hand escape sequence is defined will be escaped
* using 3-digit octal sequences.
*/
- static String escapeBytes(final ByteString input) {
+ private static String escapeBytes(final ByteSequence input) {
final StringBuilder builder = new StringBuilder(input.size());
for (int i = 0; i < input.size(); i++) {
final byte b = input.byteAt(i);
@@ -1064,6 +1656,9 @@ public final class TextFormat {
case '\'': builder.append("\\\'"); break;
case '"' : builder.append("\\\""); break;
default:
+ // Note: Bytes with the high-order bit set should be escaped. Since
+ // bytes are signed, such bytes will compare less than 0x20, hence
+ // the following line is correct.
if (b >= 0x20) {
builder.append((char) b);
} else {
@@ -1079,31 +1674,74 @@ public final class TextFormat {
}
/**
+ * Escapes bytes in the format used in protocol buffer text format, which
+ * is the same as the format used for C string literals. All bytes
+ * that are not printable 7-bit ASCII characters are escaped, as well as
+ * backslash, single-quote, and double-quote characters. Characters for
+ * which no defined short-hand escape sequence is defined will be escaped
+ * using 3-digit octal sequences.
+ */
+ static String escapeBytes(final ByteString input) {
+ return escapeBytes(new ByteSequence() {
+ public int size() {
+ return input.size();
+ }
+ public byte byteAt(int offset) {
+ return input.byteAt(offset);
+ }
+ });
+ }
+
+ /**
+ * Like {@link #escapeBytes(ByteString)}, but used for byte array.
+ */
+ static String escapeBytes(final byte[] input) {
+ return escapeBytes(new ByteSequence() {
+ public int size() {
+ return input.length;
+ }
+ public byte byteAt(int offset) {
+ return input[offset];
+ }
+ });
+ }
+
+ /**
* Un-escape a byte sequence as escaped using
* {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with
* "\x") are also recognized.
*/
- static ByteString unescapeBytes(final CharSequence input)
+ static ByteString unescapeBytes(final CharSequence charString)
throws InvalidEscapeSequenceException {
- final byte[] result = new byte[input.length()];
+ // First convert the Java character sequence to UTF-8 bytes.
+ ByteString input = ByteString.copyFromUtf8(charString.toString());
+ // Then unescape certain byte sequences introduced by ASCII '\\'. The valid
+ // escapes can all be expressed with ASCII characters, so it is safe to
+ // operate on bytes here.
+ //
+ // Unescaping the input byte array will result in a byte sequence that's no
+ // longer than the input. That's because each escape sequence is between
+ // two and four bytes long and stands for a single byte.
+ final byte[] result = new byte[input.size()];
int pos = 0;
- for (int i = 0; i < input.length(); i++) {
- char c = input.charAt(i);
+ for (int i = 0; i < input.size(); i++) {
+ byte c = input.byteAt(i);
if (c == '\\') {
- if (i + 1 < input.length()) {
+ if (i + 1 < input.size()) {
++i;
- c = input.charAt(i);
+ c = input.byteAt(i);
if (isOctal(c)) {
// Octal escape.
int code = digitValue(c);
- if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) {
+ if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
++i;
- code = code * 8 + digitValue(input.charAt(i));
+ code = code * 8 + digitValue(input.byteAt(i));
}
- if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) {
+ if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
++i;
- code = code * 8 + digitValue(input.charAt(i));
+ code = code * 8 + digitValue(input.byteAt(i));
}
+ // TODO: Check that 0 <= code && code <= 0xFF.
result[pos++] = (byte)code;
} else {
switch (c) {
@@ -1121,31 +1759,31 @@ public final class TextFormat {
case 'x':
// hex escape
int code = 0;
- if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
+ if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
++i;
- code = digitValue(input.charAt(i));
+ code = digitValue(input.byteAt(i));
} else {
throw new InvalidEscapeSequenceException(
- "Invalid escape sequence: '\\x' with no digits");
+ "Invalid escape sequence: '\\x' with no digits");
}
- if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
+ if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
++i;
- code = code * 16 + digitValue(input.charAt(i));
+ code = code * 16 + digitValue(input.byteAt(i));
}
result[pos++] = (byte)code;
break;
default:
throw new InvalidEscapeSequenceException(
- "Invalid escape sequence: '\\" + c + '\'');
+ "Invalid escape sequence: '\\" + (char)c + '\'');
}
}
} else {
throw new InvalidEscapeSequenceException(
- "Invalid escape sequence: '\\' at end of string.");
+ "Invalid escape sequence: '\\' at end of string.");
}
} else {
- result[pos++] = (byte)c;
+ result[pos++] = c;
}
}
@@ -1174,6 +1812,13 @@ public final class TextFormat {
}
/**
+ * Escape double quotes and backslashes in a String for unicode output of a message.
+ */
+ public static String escapeDoubleQuotesAndBackslashes(final String input) {
+ return input.replace("\\", "\\\\").replace("\"", "\\\"");
+ }
+
+ /**
* Un-escape a text string as escaped using {@link #escapeText(String)}.
* Two-digit hex escapes (starting with "\x") are also recognized.
*/
@@ -1183,12 +1828,12 @@ public final class TextFormat {
}
/** Is this an octal digit? */
- private static boolean isOctal(final char c) {
+ private static boolean isOctal(final byte c) {
return '0' <= c && c <= '7';
}
/** Is this a hex digit? */
- private static boolean isHex(final char c) {
+ private static boolean isHex(final byte c) {
return ('0' <= c && c <= '9') ||
('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F');
@@ -1199,7 +1844,7 @@ public final class TextFormat {
* numeric value. This is like {@code Character.digit()} but we don't accept
* non-ASCII digits.
*/
- private static int digitValue(final char c) {
+ private static int digitValue(final byte c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('a' <= c && c <= 'z') {
@@ -1212,7 +1857,7 @@ public final class TextFormat {
/**
* Parse a 32-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively.
+ * and "0" to signify hexadecimal and octal numbers, respectively.
*/
static int parseInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, true, false);
@@ -1221,7 +1866,7 @@ public final class TextFormat {
/**
* Parse a 32-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively. The
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code int} when returned since Java has
* no unsigned integer type.
*/
@@ -1232,7 +1877,7 @@ public final class TextFormat {
/**
* Parse a 64-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively.
+ * and "0" to signify hexadecimal and octal numbers, respectively.
*/
static long parseInt64(final String text) throws NumberFormatException {
return parseInteger(text, true, true);
@@ -1241,7 +1886,7 @@ public final class TextFormat {
/**
* Parse a 64-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively. The
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code long} when returned since Java has
* no unsigned long type.
*/