diff options
Diffstat (limited to 'java/src/main/java/com/google/protobuf/TextFormat.java')
-rw-r--r-- | java/src/main/java/com/google/protobuf/TextFormat.java | 1381 |
1 files changed, 1013 insertions, 368 deletions
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java index cb23f0c..57d0ca6 100644 --- a/java/src/main/java/com/google/protobuf/TextFormat.java +++ b/java/src/main/java/com/google/protobuf/TextFormat.java @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// http://code.google.com/p/protobuf/ +// https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -31,63 +31,119 @@ package com.google.protobuf; import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.FieldDescriptor; import com.google.protobuf.Descriptors.EnumDescriptor; import com.google.protobuf.Descriptors.EnumValueDescriptor; +import com.google.protobuf.Descriptors.FieldDescriptor; import java.io.IOException; -import java.nio.CharBuffer; import java.math.BigInteger; +import java.nio.CharBuffer; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * Provide ascii text parsing and formatting support for proto2 instances. + * Provide text parsing and formatting support for proto2 instances. * The implementation largely follows google/protobuf/text_format.cc. * * @author wenboz@google.com Wenbo Zhu * @author kenton@google.com Kenton Varda */ public final class TextFormat { - private TextFormat() { - } + private TextFormat() {} + + private static final Logger logger = + Logger.getLogger(TextFormat.class.getName()); + + private static final Printer DEFAULT_PRINTER = new Printer(); + private static final Printer SINGLE_LINE_PRINTER = + (new Printer()).setSingleLineMode(true); + private static final Printer UNICODE_PRINTER = + (new Printer()).setEscapeNonAscii(false); /** * Outputs a textual representation of the Protocol Message supplied into * the parameter output. (This representation is the new version of the * classic "ProtocolPrinter" output from the original Protocol Buffer system) */ - public static void print(final Message message, final Appendable output) - throws IOException { - final TextGenerator generator = new TextGenerator(output); - print(message, generator); + public static void print( + final MessageOrBuilder message, final Appendable output) + throws IOException { + DEFAULT_PRINTER.print(message, new TextGenerator(output)); } /** Outputs a textual representation of {@code fields} to {@code output}. */ public static void print(final UnknownFieldSet fields, final Appendable output) throws IOException { - final TextGenerator generator = new TextGenerator(output); - printUnknownFields(fields, generator); + DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output)); + } + + /** + * Same as {@code print()}, except that non-ASCII characters are not + * escaped. + */ + public static void printUnicode( + final MessageOrBuilder message, final Appendable output) + throws IOException { + UNICODE_PRINTER.print(message, new TextGenerator(output)); + } + + /** + * Same as {@code print()}, except that non-ASCII characters are not + * escaped. + */ + public static void printUnicode(final UnknownFieldSet fields, + final Appendable output) + throws IOException { + UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output)); + } + + /** + * Generates a human readable form of this message, useful for debugging and + * other purposes, with no newline characters. + */ + public static String shortDebugString(final MessageOrBuilder message) { + try { + final StringBuilder sb = new StringBuilder(); + SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb)); + // Single line mode currently might have an extra space at the end. + return sb.toString().trim(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + /** + * Generates a human readable form of the unknown fields, useful for debugging + * and other purposes, with no newline characters. + */ + public static String shortDebugString(final UnknownFieldSet fields) { + try { + final StringBuilder sb = new StringBuilder(); + SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb)); + // Single line mode currently might have an extra space at the end. + return sb.toString().trim(); + } catch (IOException e) { + throw new IllegalStateException(e); + } } /** * Like {@code print()}, but writes directly to a {@code String} and * returns it. */ - public static String printToString(final Message message) { + public static String printToString(final MessageOrBuilder message) { try { final StringBuilder text = new StringBuilder(); print(message, text); return text.toString(); } catch (IOException e) { - throw new RuntimeException( - "Writing to a StringBuilder threw an IOException (should never " + - "happen).", e); + throw new IllegalStateException(e); } } @@ -101,28 +157,43 @@ public final class TextFormat { print(fields, text); return text.toString(); } catch (IOException e) { - throw new RuntimeException( - "Writing to a StringBuilder threw an IOException (should never " + - "happen).", e); + throw new IllegalStateException(e); } } - private static void print(final Message message, - final TextGenerator generator) - throws IOException { - for (final Map.Entry<FieldDescriptor, Object> field : - message.getAllFields().entrySet()) { - printField(field.getKey(), field.getValue(), generator); + /** + * Same as {@code printToString()}, except that non-ASCII characters + * in string type fields are not escaped in backslash+octals. + */ + public static String printToUnicodeString(final MessageOrBuilder message) { + try { + final StringBuilder text = new StringBuilder(); + UNICODE_PRINTER.print(message, new TextGenerator(text)); + return text.toString(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + /** + * Same as {@code printToString()}, except that non-ASCII characters + * in string type fields are not escaped in backslash+octals. + */ + public static String printToUnicodeString(final UnknownFieldSet fields) { + try { + final StringBuilder text = new StringBuilder(); + UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text)); + return text.toString(); + } catch (IOException e) { + throw new IllegalStateException(e); } - printUnknownFields(message.getUnknownFields(), generator); } public static void printField(final FieldDescriptor field, final Object value, final Appendable output) throws IOException { - final TextGenerator generator = new TextGenerator(output); - printField(field, value, generator); + DEFAULT_PRINTER.printField(field, value, new TextGenerator(output)); } public static String printFieldToString(final FieldDescriptor field, @@ -132,173 +203,298 @@ public final class TextFormat { printField(field, value, text); return text.toString(); } catch (IOException e) { - throw new RuntimeException( - "Writing to a StringBuilder threw an IOException (should never " + - "happen).", e); + throw new IllegalStateException(e); } } - private static void printField(final FieldDescriptor field, - final Object value, - final TextGenerator generator) - throws IOException { - if (field.isRepeated()) { - // Repeated field. Print each element. - for (final Object element : (List) value) { - printSingleField(field, element, generator); - } - } else { - printSingleField(field, value, generator); + /** + * Outputs a textual representation of the value of given field value. + * + * @param field the descriptor of the field + * @param value the value of the field + * @param output the output to which to append the formatted value + * @throws ClassCastException if the value is not appropriate for the + * given field descriptor + * @throws IOException if there is an exception writing to the output + */ + public static void printFieldValue(final FieldDescriptor field, + final Object value, + final Appendable output) + throws IOException { + DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output)); + } + + /** + * Outputs a textual representation of the value of an unknown field. + * + * @param tag the field's tag number + * @param value the value of the field + * @param output the output to which to append the formatted value + * @throws ClassCastException if the value is not appropriate for the + * given field descriptor + * @throws IOException if there is an exception writing to the output + */ + public static void printUnknownFieldValue(final int tag, + final Object value, + final Appendable output) + throws IOException { + printUnknownFieldValue(tag, value, new TextGenerator(output)); + } + + private static void printUnknownFieldValue(final int tag, + final Object value, + final TextGenerator generator) + throws IOException { + switch (WireFormat.getTagWireType(tag)) { + case WireFormat.WIRETYPE_VARINT: + generator.print(unsignedToString((Long) value)); + break; + case WireFormat.WIRETYPE_FIXED32: + generator.print( + String.format((Locale) null, "0x%08x", (Integer) value)); + break; + case WireFormat.WIRETYPE_FIXED64: + generator.print(String.format((Locale) null, "0x%016x", (Long) value)); + break; + case WireFormat.WIRETYPE_LENGTH_DELIMITED: + generator.print("\""); + generator.print(escapeBytes((ByteString) value)); + generator.print("\""); + break; + case WireFormat.WIRETYPE_START_GROUP: + DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator); + break; + default: + throw new IllegalArgumentException("Bad tag: " + tag); } } - private static void printSingleField(final FieldDescriptor field, - final Object value, - final TextGenerator generator) - throws IOException { - if (field.isExtension()) { - generator.print("["); - // We special-case MessageSet elements for compatibility with proto1. - if (field.getContainingType().getOptions().getMessageSetWireFormat() - && (field.getType() == FieldDescriptor.Type.MESSAGE) - && (field.isOptional()) - // object equality - && (field.getExtensionScope() == field.getMessageType())) { - generator.print(field.getMessageType().getFullName()); - } else { - generator.print(field.getFullName()); + /** Helper class for converting protobufs to text. */ + private static final class Printer { + /** Whether to omit newlines from the output. */ + boolean singleLineMode = false; + + /** Whether to escape non ASCII characters with backslash and octal. */ + boolean escapeNonAscii = true; + + private Printer() {} + + /** Setter of singleLineMode */ + private Printer setSingleLineMode(boolean singleLineMode) { + this.singleLineMode = singleLineMode; + return this; + } + + /** Setter of escapeNonAscii */ + private Printer setEscapeNonAscii(boolean escapeNonAscii) { + this.escapeNonAscii = escapeNonAscii; + return this; + } + + private void print( + final MessageOrBuilder message, final TextGenerator generator) + throws IOException { + for (Map.Entry<FieldDescriptor, Object> field + : message.getAllFields().entrySet()) { + printField(field.getKey(), field.getValue(), generator); } - generator.print("]"); - } else { - if (field.getType() == FieldDescriptor.Type.GROUP) { - // Groups must be serialized with their original capitalization. - generator.print(field.getMessageType().getName()); + printUnknownFields(message.getUnknownFields(), generator); + } + + private void printField(final FieldDescriptor field, final Object value, + final TextGenerator generator) throws IOException { + if (field.isRepeated()) { + // Repeated field. Print each element. + for (Object element : (List<?>) value) { + printSingleField(field, element, generator); + } } else { - generator.print(field.getName()); + printSingleField(field, value, generator); } } - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { - generator.print(" {\n"); - generator.indent(); - } else { - generator.print(": "); - } + private void printSingleField(final FieldDescriptor field, + final Object value, + final TextGenerator generator) + throws IOException { + if (field.isExtension()) { + generator.print("["); + // We special-case MessageSet elements for compatibility with proto1. + if (field.getContainingType().getOptions().getMessageSetWireFormat() + && (field.getType() == FieldDescriptor.Type.MESSAGE) + && (field.isOptional()) + // object equality + && (field.getExtensionScope() == field.getMessageType())) { + generator.print(field.getMessageType().getFullName()); + } else { + generator.print(field.getFullName()); + } + generator.print("]"); + } else { + if (field.getType() == FieldDescriptor.Type.GROUP) { + // Groups must be serialized with their original capitalization. + generator.print(field.getMessageType().getName()); + } else { + generator.print(field.getName()); + } + } - printFieldValue(field, value, generator); + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + if (singleLineMode) { + generator.print(" { "); + } else { + generator.print(" {\n"); + generator.indent(); + } + } else { + generator.print(": "); + } + + printFieldValue(field, value, generator); - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { - generator.outdent(); - generator.print("}"); + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + if (singleLineMode) { + generator.print("} "); + } else { + generator.outdent(); + generator.print("}\n"); + } + } else { + if (singleLineMode) { + generator.print(" "); + } else { + generator.print("\n"); + } + } } - generator.print("\n"); - } - private static void printFieldValue(final FieldDescriptor field, - final Object value, - final TextGenerator generator) - throws IOException { - switch (field.getType()) { - case INT32: - case INT64: - case SINT32: - case SINT64: - case SFIXED32: - case SFIXED64: - case FLOAT: - case DOUBLE: - case BOOL: - // Good old toString() does what we want for these types. - generator.print(value.toString()); - break; + private void printFieldValue(final FieldDescriptor field, + final Object value, + final TextGenerator generator) + throws IOException { + switch (field.getType()) { + case INT32: + case SINT32: + case SFIXED32: + generator.print(((Integer) value).toString()); + break; - case UINT32: - case FIXED32: - generator.print(unsignedToString((Integer) value)); - break; + case INT64: + case SINT64: + case SFIXED64: + generator.print(((Long) value).toString()); + break; - case UINT64: - case FIXED64: - generator.print(unsignedToString((Long) value)); - break; + case BOOL: + generator.print(((Boolean) value).toString()); + break; - case STRING: - generator.print("\""); - generator.print(escapeText((String) value)); - generator.print("\""); - break; + case FLOAT: + generator.print(((Float) value).toString()); + break; - case BYTES: - generator.print("\""); - generator.print(escapeBytes((ByteString) value)); - generator.print("\""); - break; + case DOUBLE: + generator.print(((Double) value).toString()); + break; - case ENUM: - generator.print(((EnumValueDescriptor) value).getName()); - break; + case UINT32: + case FIXED32: + generator.print(unsignedToString((Integer) value)); + break; - case MESSAGE: - case GROUP: - print((Message) value, generator); - break; - } - } + case UINT64: + case FIXED64: + generator.print(unsignedToString((Long) value)); + break; - private static void printUnknownFields(final UnknownFieldSet unknownFields, - final TextGenerator generator) - throws IOException { - for (final Map.Entry<Integer, UnknownFieldSet.Field> entry : - unknownFields.asMap().entrySet()) { - final String prefix = entry.getKey().toString() + ": "; - final UnknownFieldSet.Field field = entry.getValue(); + case STRING: + generator.print("\""); + generator.print(escapeNonAscii ? + escapeText((String) value) : + escapeDoubleQuotesAndBackslashes((String) value)); + generator.print("\""); + break; - for (final long value : field.getVarintList()) { - generator.print(entry.getKey().toString()); - generator.print(": "); - generator.print(unsignedToString(value)); - generator.print("\n"); + case BYTES: + generator.print("\""); + if (value instanceof ByteString) { + generator.print(escapeBytes((ByteString) value)); + } else { + generator.print(escapeBytes((byte[]) value)); + } + generator.print("\""); + break; + + case ENUM: + generator.print(((EnumValueDescriptor) value).getName()); + break; + + case MESSAGE: + case GROUP: + print((Message) value, generator); + break; } - for (final int value : field.getFixed32List()) { - generator.print(entry.getKey().toString()); - generator.print(": "); - generator.print(String.format((Locale) null, "0x%08x", value)); - generator.print("\n"); + } + + private void printUnknownFields(final UnknownFieldSet unknownFields, + final TextGenerator generator) + throws IOException { + for (Map.Entry<Integer, UnknownFieldSet.Field> entry : + unknownFields.asMap().entrySet()) { + final int number = entry.getKey(); + final UnknownFieldSet.Field field = entry.getValue(); + printUnknownField(number, WireFormat.WIRETYPE_VARINT, + field.getVarintList(), generator); + printUnknownField(number, WireFormat.WIRETYPE_FIXED32, + field.getFixed32List(), generator); + printUnknownField(number, WireFormat.WIRETYPE_FIXED64, + field.getFixed64List(), generator); + printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED, + field.getLengthDelimitedList(), generator); + for (final UnknownFieldSet value : field.getGroupList()) { + generator.print(entry.getKey().toString()); + if (singleLineMode) { + generator.print(" { "); + } else { + generator.print(" {\n"); + generator.indent(); + } + printUnknownFields(value, generator); + if (singleLineMode) { + generator.print("} "); + } else { + generator.outdent(); + generator.print("}\n"); + } + } } - for (final long value : field.getFixed64List()) { - generator.print(entry.getKey().toString()); + } + + private void printUnknownField(final int number, + final int wireType, + final List<?> values, + final TextGenerator generator) + throws IOException { + for (final Object value : values) { + generator.print(String.valueOf(number)); generator.print(": "); - generator.print(String.format((Locale) null, "0x%016x", value)); - generator.print("\n"); - } - for (final ByteString value : field.getLengthDelimitedList()) { - generator.print(entry.getKey().toString()); - generator.print(": \""); - generator.print(escapeBytes(value)); - generator.print("\"\n"); - } - for (final UnknownFieldSet value : field.getGroupList()) { - generator.print(entry.getKey().toString()); - generator.print(" {\n"); - generator.indent(); - printUnknownFields(value, generator); - generator.outdent(); - generator.print("}\n"); + printUnknownFieldValue(wireType, value, generator); + generator.print(singleLineMode ? " " : "\n"); } } } /** Convert an unsigned 32-bit integer to a string. */ - private static String unsignedToString(final int value) { + public static String unsignedToString(final int value) { if (value >= 0) { return Integer.toString(value); } else { - return Long.toString(((long) value) & 0x00000000FFFFFFFFL); + return Long.toString(value & 0x00000000FFFFFFFFL); } } /** Convert an unsigned 64-bit integer to a string. */ - private static String unsignedToString(final long value) { + public static String unsignedToString(final long value) { if (value >= 0) { return Long.toString(value); } else { @@ -313,9 +509,9 @@ public final class TextFormat { * An inner class for writing text to the output stream. */ private static final class TextGenerator { - private Appendable output; - private boolean atStartOfLine = true; + private final Appendable output; private final StringBuilder indent = new StringBuilder(); + private boolean atStartOfLine = true; private TextGenerator(final Appendable output) { this.output = output; @@ -352,17 +548,16 @@ public final class TextFormat { for (int i = 0; i < size; i++) { if (text.charAt(i) == '\n') { - write(text.subSequence(pos, size), i - pos + 1); + write(text.subSequence(pos, i + 1)); pos = i + 1; atStartOfLine = true; } } - write(text.subSequence(pos, size), size - pos); + write(text.subSequence(pos, size)); } - private void write(final CharSequence data, final int size) - throws IOException { - if (size == 0) { + private void write(final CharSequence data) throws IOException { + if (data.length() == 0) { return; } if (atStartOfLine) { @@ -421,7 +616,7 @@ public final class TextFormat { private int previousLine = 0; private int previousColumn = 0; - // We use possesive quantifiers (*+ and ++) because otherwise the Java + // We use possessive quantifiers (*+ and ++) because otherwise the Java // regex matcher has stack overflows on large inputs. private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); @@ -539,6 +734,14 @@ public final class TextFormat { } /** + * Returns {@code true} if the current token's text is equal to that + * specified. + */ + public boolean lookingAt(String text) { + return currentToken.equals(text); + } + + /** * If the next token is an identifier, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -551,7 +754,8 @@ public final class TextFormat { (c == '_') || (c == '.')) { // OK } else { - throw parseException("Expected identifier."); + throw parseException( + "Expected identifier. Found '" + currentToken + "'"); } } @@ -561,6 +765,19 @@ public final class TextFormat { } /** + * If the next token is an identifier, consume it and return {@code true}. + * Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeIdentifier() { + try { + consumeIdentifier(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a 32-bit signed integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ @@ -603,6 +820,19 @@ public final class TextFormat { } /** + * If the next token is a 64-bit signed integer, consume it and return + * {@code true}. Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeInt64() { + try { + consumeInt64(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a 64-bit unsigned integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ @@ -617,6 +847,19 @@ public final class TextFormat { } /** + * If the next token is a 64-bit unsigned integer, consume it and return + * {@code true}. Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeUInt64() { + try { + consumeUInt64(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a double, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -642,6 +885,19 @@ public final class TextFormat { } /** + * If the next token is a double, consume it and return {@code true}. + * Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeDouble() { + try { + consumeDouble(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a float, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -667,14 +923,31 @@ public final class TextFormat { } /** + * If the next token is a float, consume it and return {@code true}. + * Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeFloat() { + try { + consumeFloat(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a boolean, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public boolean consumeBoolean() throws ParseException { - if (currentToken.equals("true")) { + if (currentToken.equals("true") || + currentToken.equals("t") || + currentToken.equals("1")) { nextToken(); return true; - } else if (currentToken.equals("false")) { + } else if (currentToken.equals("false") || + currentToken.equals("f") || + currentToken.equals("0")) { nextToken(); return false; } else { @@ -691,6 +964,19 @@ public final class TextFormat { } /** + * If the next token is a string, consume it and return true. Otherwise, + * return false. + */ + public boolean tryConsumeString() { + try { + consumeString(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a string, consume it, unescape it as a * {@link ByteString}, and return it. Otherwise, throw a * {@link ParseException}. @@ -710,7 +996,8 @@ public final class TextFormat { * multiple adjacent tokens which are automatically concatenated, like in * C or Python. */ - private void consumeByteString(List<ByteString> list) throws ParseException { + private void consumeByteString(List<ByteString> list) + throws ParseException { final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; if (quote != '\"' && quote != '\'') { @@ -740,7 +1027,7 @@ public final class TextFormat { public ParseException parseException(final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException( - (line + 1) + ":" + (column + 1) + ": " + description); + line + 1, column + 1, description); } /** @@ -751,7 +1038,7 @@ public final class TextFormat { final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException( - (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); + previousLine + 1, previousColumn + 1, description); } /** @@ -776,11 +1063,58 @@ public final class TextFormat { public static class ParseException extends IOException { private static final long serialVersionUID = 3196188060225107702L; + private final int line; + private final int column; + + /** Create a new instance, with -1 as the line and column numbers. */ public ParseException(final String message) { - super(message); + this(-1, -1, message); + } + + /** + * Create a new instance + * + * @param line the line number where the parse error occurred, + * using 1-offset. + * @param column the column number where the parser error occurred, + * using 1-offset. + */ + public ParseException(final int line, final int column, + final String message) { + super(Integer.toString(line) + ":" + column + ": " + message); + this.line = line; + this.column = column; + } + + /** + * Return the line where the parse exception occurred, or -1 when + * none is provided. The value is specified as 1-offset, so the first + * line is line 1. + */ + public int getLine() { + return line; + } + + /** + * Return the column where the parse exception occurred, or -1 when + * none is provided. The value is specified as 1-offset, so the first + * line is line 1. + */ + public int getColumn() { + return column; } } + private static final Parser PARSER = Parser.newBuilder().build(); + + /** + * Return a {@link Parser} instance which can parse text-format + * messages. The returned instance is thread-safe. + */ + public static Parser getParser() { + return PARSER; + } + /** * Parse a text-format message from {@code input} and merge the contents * into {@code builder}. @@ -788,7 +1122,7 @@ public final class TextFormat { public static void merge(final Readable input, final Message.Builder builder) throws IOException { - merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + PARSER.merge(input, builder); } /** @@ -798,7 +1132,7 @@ public final class TextFormat { public static void merge(final CharSequence input, final Message.Builder builder) throws ParseException { - merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + PARSER.merge(input, builder); } /** @@ -810,35 +1144,9 @@ public final class TextFormat { final ExtensionRegistry extensionRegistry, final Message.Builder builder) throws IOException { - // Read the entire input to a String then parse that. - - // If StreamTokenizer were not quite so crippled, or if there were a kind - // of Reader that could read in chunks that match some particular regex, - // or if we wanted to write a custom Reader to tokenize our stream, then - // we would not have to read to one big String. Alas, none of these is - // the case. Oh well. - - merge(toStringBuilder(input), extensionRegistry, builder); + PARSER.merge(input, extensionRegistry, builder); } - private static final int BUFFER_SIZE = 4096; - - // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) - // overhead is worthwhile - private static StringBuilder toStringBuilder(final Readable input) - throws IOException { - final StringBuilder text = new StringBuilder(); - final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); - while (true) { - final int n = input.read(buffer); - if (n == -1) { - break; - } - buffer.flip(); - text.append(buffer, 0, n); - } - return text; - } /** * Parse a text-format message from {@code input} and merge the contents @@ -849,187 +1157,466 @@ public final class TextFormat { final ExtensionRegistry extensionRegistry, final Message.Builder builder) throws ParseException { - final Tokenizer tokenizer = new Tokenizer(input); - - while (!tokenizer.atEnd()) { - mergeField(tokenizer, extensionRegistry, builder); - } + PARSER.merge(input, extensionRegistry, builder); } + /** - * Parse a single field from {@code tokenizer} and merge it into - * {@code builder}. + * Parser for text-format proto2 instances. This class is thread-safe. + * The implementation largely follows google/protobuf/text_format.cc. + * + * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or + * {@link Builder} to control the parser behavior. */ - private static void mergeField(final Tokenizer tokenizer, - final ExtensionRegistry extensionRegistry, - final Message.Builder builder) - throws ParseException { - FieldDescriptor field; - final Descriptor type = builder.getDescriptorForType(); - ExtensionRegistry.ExtensionInfo extension = null; + public static class Parser { + /** + * Determines if repeated values for non-repeated fields and + * oneofs are permitted. For example, given required/optional field "foo" + * and a oneof containing "baz" and "qux": + * <li> + * <ul>"foo: 1 foo: 2" + * <ul>"baz: 1 qux: 2" + * <ul>merging "foo: 2" into a proto in which foo is already set, or + * <ul>merging "qux: 2" into a proto in which baz is already set. + * </li> + */ + public enum SingularOverwritePolicy { + /** The last value is retained. */ + ALLOW_SINGULAR_OVERWRITES, + /** An error is issued. */ + FORBID_SINGULAR_OVERWRITES + } - if (tokenizer.tryConsume("[")) { - // An extension. - final StringBuilder name = - new StringBuilder(tokenizer.consumeIdentifier()); - while (tokenizer.tryConsume(".")) { - name.append('.'); - name.append(tokenizer.consumeIdentifier()); - } + private final boolean allowUnknownFields; + private final SingularOverwritePolicy singularOverwritePolicy; + + private Parser(boolean allowUnknownFields, + SingularOverwritePolicy singularOverwritePolicy) { + this.allowUnknownFields = allowUnknownFields; + this.singularOverwritePolicy = singularOverwritePolicy; + } + + /** + * Returns a new instance of {@link Builder}. + */ + public static Builder newBuilder() { + return new Builder(); + } - extension = extensionRegistry.findExtensionByName(name.toString()); + /** + * Builder that can be used to obtain new instances of {@link Parser}. + */ + public static class Builder { + private boolean allowUnknownFields = false; + private SingularOverwritePolicy singularOverwritePolicy = + SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; + + /** + * Sets parser behavior when a non-repeated field appears more than once. + */ + public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { + this.singularOverwritePolicy = p; + return this; + } - if (extension == null) { - throw tokenizer.parseExceptionPreviousToken( - "Extension \"" + name + "\" not found in the ExtensionRegistry."); - } else if (extension.descriptor.getContainingType() != type) { - throw tokenizer.parseExceptionPreviousToken( - "Extension \"" + name + "\" does not extend message type \"" + - type.getFullName() + "\"."); + public Parser build() { + return new Parser(allowUnknownFields, singularOverwritePolicy); } + } - tokenizer.consume("]"); + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. + */ + public void merge(final Readable input, + final Message.Builder builder) + throws IOException { + merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + } - field = extension.descriptor; - } else { - final String name = tokenizer.consumeIdentifier(); - field = type.findFieldByName(name); + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. + */ + public void merge(final CharSequence input, + final Message.Builder builder) + throws ParseException { + merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + } - // Group names are expected to be capitalized as they appear in the - // .proto file, which actually matches their type names, not their field - // names. - if (field == null) { - // Explicitly specify US locale so that this code does not break when - // executing in Turkey. - final String lowerName = name.toLowerCase(Locale.US); - field = type.findFieldByName(lowerName); - // If the case-insensitive match worked but the field is NOT a group, - if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { - field = null; + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. Extensions will be recognized if they are + * registered in {@code extensionRegistry}. + */ + public void merge(final Readable input, + final ExtensionRegistry extensionRegistry, + final Message.Builder builder) + throws IOException { + // Read the entire input to a String then parse that. + + // If StreamTokenizer were not quite so crippled, or if there were a kind + // of Reader that could read in chunks that match some particular regex, + // or if we wanted to write a custom Reader to tokenize our stream, then + // we would not have to read to one big String. Alas, none of these is + // the case. Oh well. + + merge(toStringBuilder(input), extensionRegistry, builder); + } + + + private static final int BUFFER_SIZE = 4096; + + // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) + // overhead is worthwhile + private static StringBuilder toStringBuilder(final Readable input) + throws IOException { + final StringBuilder text = new StringBuilder(); + final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); + while (true) { + final int n = input.read(buffer); + if (n == -1) { + break; } + buffer.flip(); + text.append(buffer, 0, n); } - // Again, special-case group names as described above. - if (field != null && field.getType() == FieldDescriptor.Type.GROUP && - !field.getMessageType().getName().equals(name)) { - field = null; - } + return text; + } - if (field == null) { - throw tokenizer.parseExceptionPreviousToken( - "Message type \"" + type.getFullName() + - "\" has no field named \"" + name + "\"."); + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. Extensions will be recognized if they are + * registered in {@code extensionRegistry}. + */ + public void merge(final CharSequence input, + final ExtensionRegistry extensionRegistry, + final Message.Builder builder) + throws ParseException { + final Tokenizer tokenizer = new Tokenizer(input); + MessageReflection.BuilderAdapter target = + new MessageReflection.BuilderAdapter(builder); + + while (!tokenizer.atEnd()) { + mergeField(tokenizer, extensionRegistry, target); } } - Object value = null; - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { - tokenizer.tryConsume(":"); // optional + /** + * Parse a single field from {@code tokenizer} and merge it into + * {@code builder}. + */ + private void mergeField(final Tokenizer tokenizer, + final ExtensionRegistry extensionRegistry, + final MessageReflection.MergeTarget target) + throws ParseException { + FieldDescriptor field = null; + final Descriptor type = target.getDescriptorForType(); + ExtensionRegistry.ExtensionInfo extension = null; + + if (tokenizer.tryConsume("[")) { + // An extension. + final StringBuilder name = + new StringBuilder(tokenizer.consumeIdentifier()); + while (tokenizer.tryConsume(".")) { + name.append('.'); + name.append(tokenizer.consumeIdentifier()); + } - final String endToken; - if (tokenizer.tryConsume("<")) { - endToken = ">"; - } else { - tokenizer.consume("{"); - endToken = "}"; - } + extension = target.findExtensionByName( + extensionRegistry, name.toString()); + + if (extension == null) { + if (!allowUnknownFields) { + throw tokenizer.parseExceptionPreviousToken( + "Extension \"" + name + "\" not found in the ExtensionRegistry."); + } else { + logger.warning( + "Extension \"" + name + "\" not found in the ExtensionRegistry."); + } + } else { + if (extension.descriptor.getContainingType() != type) { + throw tokenizer.parseExceptionPreviousToken( + "Extension \"" + name + "\" does not extend message type \"" + + type.getFullName() + "\"."); + } + field = extension.descriptor; + } - final Message.Builder subBuilder; - if (extension == null) { - subBuilder = builder.newBuilderForField(field); + tokenizer.consume("]"); } else { - subBuilder = extension.defaultInstance.newBuilderForType(); - } + final String name = tokenizer.consumeIdentifier(); + field = type.findFieldByName(name); + + // Group names are expected to be capitalized as they appear in the + // .proto file, which actually matches their type names, not their field + // names. + if (field == null) { + // Explicitly specify US locale so that this code does not break when + // executing in Turkey. + final String lowerName = name.toLowerCase(Locale.US); + field = type.findFieldByName(lowerName); + // If the case-insensitive match worked but the field is NOT a group, + if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { + field = null; + } + } + // Again, special-case group names as described above. + if (field != null && field.getType() == FieldDescriptor.Type.GROUP && + !field.getMessageType().getName().equals(name)) { + field = null; + } - while (!tokenizer.tryConsume(endToken)) { - if (tokenizer.atEnd()) { - throw tokenizer.parseException( - "Expected \"" + endToken + "\"."); + if (field == null) { + if (!allowUnknownFields) { + throw tokenizer.parseExceptionPreviousToken( + "Message type \"" + type.getFullName() + + "\" has no field named \"" + name + "\"."); + } else { + logger.warning( + "Message type \"" + type.getFullName() + + "\" has no field named \"" + name + "\"."); + } } - mergeField(tokenizer, extensionRegistry, subBuilder); } - value = subBuilder.build(); - - } else { - tokenizer.consume(":"); + // Skips unknown fields. + if (field == null) { + // Try to guess the type of this field. + // If this field is not a message, there should be a ":" between the + // field name and the field value and also the field value should not + // start with "{" or "<" which indicates the begining of a message body. + // If there is no ":" or there is a "{" or "<" after ":", this field has + // to be a message or the input is ill-formed. + if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && + !tokenizer.lookingAt("<")) { + skipFieldValue(tokenizer); + } else { + skipFieldMessage(tokenizer); + } + return; + } - switch (field.getType()) { - case INT32: - case SINT32: - case SFIXED32: - value = tokenizer.consumeInt32(); - break; + // Handle potential ':'. + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + tokenizer.tryConsume(":"); // optional + } else { + tokenizer.consume(":"); // required + } + // Support specifying repeated field values as a comma-separated list. + // Ex."foo: [1, 2, 3]" + if (field.isRepeated() && tokenizer.tryConsume("[")) { + while (true) { + consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); + if (tokenizer.tryConsume("]")) { + // End of list. + break; + } + tokenizer.consume(","); + } + } else { + consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); + } + } - case INT64: - case SINT64: - case SFIXED64: - value = tokenizer.consumeInt64(); - break; + /** + * Parse a single field value from {@code tokenizer} and merge it into + * {@code builder}. + */ + private void consumeFieldValue( + final Tokenizer tokenizer, + final ExtensionRegistry extensionRegistry, + final MessageReflection.MergeTarget target, + final FieldDescriptor field, + final ExtensionRegistry.ExtensionInfo extension) + throws ParseException { + Object value = null; + + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + final String endToken; + if (tokenizer.tryConsume("<")) { + endToken = ">"; + } else { + tokenizer.consume("{"); + endToken = "}"; + } - case UINT32: - case FIXED32: - value = tokenizer.consumeUInt32(); - break; + final MessageReflection.MergeTarget subField; + subField = target.newMergeTargetForField(field, + (extension == null) ? null : extension.defaultInstance); - case UINT64: - case FIXED64: - value = tokenizer.consumeUInt64(); - break; + while (!tokenizer.tryConsume(endToken)) { + if (tokenizer.atEnd()) { + throw tokenizer.parseException( + "Expected \"" + endToken + "\"."); + } + mergeField(tokenizer, extensionRegistry, subField); + } - case FLOAT: - value = tokenizer.consumeFloat(); - break; + value = subField.finish(); - case DOUBLE: - value = tokenizer.consumeDouble(); - break; + } else { + switch (field.getType()) { + case INT32: + case SINT32: + case SFIXED32: + value = tokenizer.consumeInt32(); + break; + + case INT64: + case SINT64: + case SFIXED64: + value = tokenizer.consumeInt64(); + break; + + case UINT32: + case FIXED32: + value = tokenizer.consumeUInt32(); + break; + + case UINT64: + case FIXED64: + value = tokenizer.consumeUInt64(); + break; + + case FLOAT: + value = tokenizer.consumeFloat(); + break; + + case DOUBLE: + value = tokenizer.consumeDouble(); + break; + + case BOOL: + value = tokenizer.consumeBoolean(); + break; + + case STRING: + value = tokenizer.consumeString(); + break; + + case BYTES: + value = tokenizer.consumeByteString(); + break; + + case ENUM: + final EnumDescriptor enumType = field.getEnumType(); + + if (tokenizer.lookingAtInteger()) { + final int number = tokenizer.consumeInt32(); + value = enumType.findValueByNumber(number); + if (value == null) { + throw tokenizer.parseExceptionPreviousToken( + "Enum type \"" + enumType.getFullName() + + "\" has no value with number " + number + '.'); + } + } else { + final String id = tokenizer.consumeIdentifier(); + value = enumType.findValueByName(id); + if (value == null) { + throw tokenizer.parseExceptionPreviousToken( + "Enum type \"" + enumType.getFullName() + + "\" has no value named \"" + id + "\"."); + } + } - case BOOL: - value = tokenizer.consumeBoolean(); - break; + break; - case STRING: - value = tokenizer.consumeString(); - break; + case MESSAGE: + case GROUP: + throw new RuntimeException("Can't get here."); + } + } - case BYTES: - value = tokenizer.consumeByteString(); - break; + if (field.isRepeated()) { + target.addRepeatedField(field, value); + } else if ((singularOverwritePolicy + == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) + && target.hasField(field)) { + throw tokenizer.parseExceptionPreviousToken("Non-repeated field \"" + + field.getFullName() + "\" cannot be overwritten."); + } else if ((singularOverwritePolicy + == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) + && field.getContainingOneof() != null + && target.hasOneof(field.getContainingOneof())) { + Descriptors.OneofDescriptor oneof = field.getContainingOneof(); + throw tokenizer.parseExceptionPreviousToken("Field \"" + + field.getFullName() + "\" is specified along with field \"" + + target.getOneofFieldDescriptor(oneof).getFullName() + + "\", another member of oneof \"" + oneof.getName() + "\"."); + } else { + target.setField(field, value); + } + } - case ENUM: - final EnumDescriptor enumType = field.getEnumType(); - - if (tokenizer.lookingAtInteger()) { - final int number = tokenizer.consumeInt32(); - value = enumType.findValueByNumber(number); - if (value == null) { - throw tokenizer.parseExceptionPreviousToken( - "Enum type \"" + enumType.getFullName() + - "\" has no value with number " + number + '.'); - } - } else { - final String id = tokenizer.consumeIdentifier(); - value = enumType.findValueByName(id); - if (value == null) { - throw tokenizer.parseExceptionPreviousToken( - "Enum type \"" + enumType.getFullName() + - "\" has no value named \"" + id + "\"."); - } - } + /** + * Skips the next field including the field's name and value. + */ + private void skipField(Tokenizer tokenizer) throws ParseException { + if (tokenizer.tryConsume("[")) { + // Extension name. + do { + tokenizer.consumeIdentifier(); + } while (tokenizer.tryConsume(".")); + tokenizer.consume("]"); + } else { + tokenizer.consumeIdentifier(); + } - break; + // Try to guess the type of this field. + // If this field is not a message, there should be a ":" between the + // field name and the field value and also the field value should not + // start with "{" or "<" which indicates the begining of a message body. + // If there is no ":" or there is a "{" or "<" after ":", this field has + // to be a message or the input is ill-formed. + if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && + !tokenizer.lookingAt("{")) { + skipFieldValue(tokenizer); + } else { + skipFieldMessage(tokenizer); + } + // For historical reasons, fields may optionally be separated by commas or + // semicolons. + if (!tokenizer.tryConsume(";")) { + tokenizer.tryConsume(","); + } + } - case MESSAGE: - case GROUP: - throw new RuntimeException("Can't get here."); + /** + * Skips the whole body of a message including the beginning delimeter and + * the ending delimeter. + */ + private void skipFieldMessage(Tokenizer tokenizer) throws ParseException { + final String delimiter; + if (tokenizer.tryConsume("<")) { + delimiter = ">"; + } else { + tokenizer.consume("{"); + delimiter = "}"; } + while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { + skipField(tokenizer); + } + tokenizer.consume(delimiter); } - if (field.isRepeated()) { - builder.addRepeatedField(field, value); - } else { - builder.setField(field, value); + /** + * Skips a field value. + */ + private void skipFieldValue(Tokenizer tokenizer) throws ParseException { + if (tokenizer.tryConsumeString()) { + while (tokenizer.tryConsumeString()) {} + return; + } + if (!tokenizer.tryConsumeIdentifier() && // includes enum & boolean + !tokenizer.tryConsumeInt64() && // includes int32 + !tokenizer.tryConsumeUInt64() && // includes uint32 + !tokenizer.tryConsumeDouble() && + !tokenizer.tryConsumeFloat()) { + throw tokenizer.parseException( + "Invalid field value: " + tokenizer.currentToken); + } } } @@ -1039,6 +1626,11 @@ public final class TextFormat { // Some of these methods are package-private because Descriptors.java uses // them. + private interface ByteSequence { + int size(); + byte byteAt(int offset); + } + /** * Escapes bytes in the format used in protocol buffer text format, which * is the same as the format used for C string literals. All bytes @@ -1047,7 +1639,7 @@ public final class TextFormat { * which no defined short-hand escape sequence is defined will be escaped * using 3-digit octal sequences. */ - static String escapeBytes(final ByteString input) { + private static String escapeBytes(final ByteSequence input) { final StringBuilder builder = new StringBuilder(input.size()); for (int i = 0; i < input.size(); i++) { final byte b = input.byteAt(i); @@ -1064,6 +1656,9 @@ public final class TextFormat { case '\'': builder.append("\\\'"); break; case '"' : builder.append("\\\""); break; default: + // Note: Bytes with the high-order bit set should be escaped. Since + // bytes are signed, such bytes will compare less than 0x20, hence + // the following line is correct. if (b >= 0x20) { builder.append((char) b); } else { @@ -1079,31 +1674,74 @@ public final class TextFormat { } /** + * Escapes bytes in the format used in protocol buffer text format, which + * is the same as the format used for C string literals. All bytes + * that are not printable 7-bit ASCII characters are escaped, as well as + * backslash, single-quote, and double-quote characters. Characters for + * which no defined short-hand escape sequence is defined will be escaped + * using 3-digit octal sequences. + */ + static String escapeBytes(final ByteString input) { + return escapeBytes(new ByteSequence() { + public int size() { + return input.size(); + } + public byte byteAt(int offset) { + return input.byteAt(offset); + } + }); + } + + /** + * Like {@link #escapeBytes(ByteString)}, but used for byte array. + */ + static String escapeBytes(final byte[] input) { + return escapeBytes(new ByteSequence() { + public int size() { + return input.length; + } + public byte byteAt(int offset) { + return input[offset]; + } + }); + } + + /** * Un-escape a byte sequence as escaped using * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with * "\x") are also recognized. */ - static ByteString unescapeBytes(final CharSequence input) + static ByteString unescapeBytes(final CharSequence charString) throws InvalidEscapeSequenceException { - final byte[] result = new byte[input.length()]; + // First convert the Java character sequence to UTF-8 bytes. + ByteString input = ByteString.copyFromUtf8(charString.toString()); + // Then unescape certain byte sequences introduced by ASCII '\\'. The valid + // escapes can all be expressed with ASCII characters, so it is safe to + // operate on bytes here. + // + // Unescaping the input byte array will result in a byte sequence that's no + // longer than the input. That's because each escape sequence is between + // two and four bytes long and stands for a single byte. + final byte[] result = new byte[input.size()]; int pos = 0; - for (int i = 0; i < input.length(); i++) { - char c = input.charAt(i); + for (int i = 0; i < input.size(); i++) { + byte c = input.byteAt(i); if (c == '\\') { - if (i + 1 < input.length()) { + if (i + 1 < input.size()) { ++i; - c = input.charAt(i); + c = input.byteAt(i); if (isOctal(c)) { // Octal escape. int code = digitValue(c); - if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) { + if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { ++i; - code = code * 8 + digitValue(input.charAt(i)); + code = code * 8 + digitValue(input.byteAt(i)); } - if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) { + if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { ++i; - code = code * 8 + digitValue(input.charAt(i)); + code = code * 8 + digitValue(input.byteAt(i)); } + // TODO: Check that 0 <= code && code <= 0xFF. result[pos++] = (byte)code; } else { switch (c) { @@ -1121,31 +1759,31 @@ public final class TextFormat { case 'x': // hex escape int code = 0; - if (i + 1 < input.length() && isHex(input.charAt(i + 1))) { + if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { ++i; - code = digitValue(input.charAt(i)); + code = digitValue(input.byteAt(i)); } else { throw new InvalidEscapeSequenceException( - "Invalid escape sequence: '\\x' with no digits"); + "Invalid escape sequence: '\\x' with no digits"); } - if (i + 1 < input.length() && isHex(input.charAt(i + 1))) { + if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { ++i; - code = code * 16 + digitValue(input.charAt(i)); + code = code * 16 + digitValue(input.byteAt(i)); } result[pos++] = (byte)code; break; default: throw new InvalidEscapeSequenceException( - "Invalid escape sequence: '\\" + c + '\''); + "Invalid escape sequence: '\\" + (char)c + '\''); } } } else { throw new InvalidEscapeSequenceException( - "Invalid escape sequence: '\\' at end of string."); + "Invalid escape sequence: '\\' at end of string."); } } else { - result[pos++] = (byte)c; + result[pos++] = c; } } @@ -1174,6 +1812,13 @@ public final class TextFormat { } /** + * Escape double quotes and backslashes in a String for unicode output of a message. + */ + public static String escapeDoubleQuotesAndBackslashes(final String input) { + return input.replace("\\", "\\\\").replace("\"", "\\\""); + } + + /** * Un-escape a text string as escaped using {@link #escapeText(String)}. * Two-digit hex escapes (starting with "\x") are also recognized. */ @@ -1183,12 +1828,12 @@ public final class TextFormat { } /** Is this an octal digit? */ - private static boolean isOctal(final char c) { + private static boolean isOctal(final byte c) { return '0' <= c && c <= '7'; } /** Is this a hex digit? */ - private static boolean isHex(final char c) { + private static boolean isHex(final byte c) { return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); @@ -1199,7 +1844,7 @@ public final class TextFormat { * numeric value. This is like {@code Character.digit()} but we don't accept * non-ASCII digits. */ - private static int digitValue(final char c) { + private static int digitValue(final byte c) { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'z') { @@ -1212,7 +1857,7 @@ public final class TextFormat { /** * Parse a 32-bit signed integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. + * and "0" to signify hexadecimal and octal numbers, respectively. */ static int parseInt32(final String text) throws NumberFormatException { return (int) parseInteger(text, true, false); @@ -1221,7 +1866,7 @@ public final class TextFormat { /** * Parse a 32-bit unsigned integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. The + * and "0" to signify hexadecimal and octal numbers, respectively. The * result is coerced to a (signed) {@code int} when returned since Java has * no unsigned integer type. */ @@ -1232,7 +1877,7 @@ public final class TextFormat { /** * Parse a 64-bit signed integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. + * and "0" to signify hexadecimal and octal numbers, respectively. */ static long parseInt64(final String text) throws NumberFormatException { return parseInteger(text, true, true); @@ -1241,7 +1886,7 @@ public final class TextFormat { /** * Parse a 64-bit unsigned integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. The + * and "0" to signify hexadecimal and octal numbers, respectively. The * result is coerced to a (signed) {@code long} when returned since Java has * no unsigned long type. */ |