1 files changed, 1013 insertions, 368 deletions
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java
index cb23f0c..57d0ca6 100644
--- a/java/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/src/main/java/com/google/protobuf/TextFormat.java
@@ -1,6 +1,6 @@
 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
-// http://code.google.com/p/protobuf/
+// https://developers.google.com/protocol-buffers/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -31,63 +31,119 @@
 package com.google.protobuf;
 
 import com.google.protobuf.Descriptors.Descriptor;
-import com.google.protobuf.Descriptors.FieldDescriptor;
 import com.google.protobuf.Descriptors.EnumDescriptor;
 import com.google.protobuf.Descriptors.EnumValueDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
 
 import java.io.IOException;
-import java.nio.CharBuffer;
 import java.math.BigInteger;
+import java.nio.CharBuffer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 /**
- * Provide ascii text parsing and formatting support for proto2 instances.
+ * Provide text parsing and formatting support for proto2 instances.
  * The implementation largely follows google/protobuf/text_format.cc.
  *
  * @author wenboz@google.com Wenbo Zhu
  * @author kenton@google.com Kenton Varda
  */
 public final class TextFormat {
-  private TextFormat() {
-  }
+  private TextFormat() {}
+
+  private static final Logger logger =
+      Logger.getLogger(TextFormat.class.getName());
+
+  private static final Printer DEFAULT_PRINTER = new Printer();
+  private static final Printer SINGLE_LINE_PRINTER =
+      (new Printer()).setSingleLineMode(true);
+  private static final Printer UNICODE_PRINTER =
+      (new Printer()).setEscapeNonAscii(false);
 
   /**
    * Outputs a textual representation of the Protocol Message supplied into
    * the parameter output. (This representation is the new version of the
    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
    */
-  public static void print(final Message message, final Appendable output)
-                           throws IOException {
-    final TextGenerator generator = new TextGenerator(output);
-    print(message, generator);
+  public static void print(
+      final MessageOrBuilder message, final Appendable output)
+      throws IOException {
+    DEFAULT_PRINTER.print(message, new TextGenerator(output));
   }
 
   /** Outputs a textual representation of {@code fields} to {@code output}. */
   public static void print(final UnknownFieldSet fields,
                            final Appendable output)
                            throws IOException {
-    final TextGenerator generator = new TextGenerator(output);
-    printUnknownFields(fields, generator);
+    DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
+  }
+
+  /**
+   * Same as {@code print()}, except that non-ASCII characters are not
+   * escaped.
+   */
+  public static void printUnicode(
+      final MessageOrBuilder message, final Appendable output)
+      throws IOException {
+    UNICODE_PRINTER.print(message, new TextGenerator(output));
+  }
+
+  /**
+   * Same as {@code print()}, except that non-ASCII characters are not
+   * escaped.
+   */
+  public static void printUnicode(final UnknownFieldSet fields,
+                                  final Appendable output)
+                                  throws IOException {
+    UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output));
+  }
+
+  /**
+   * Generates a human readable form of this message, useful for debugging and
+   * other purposes, with no newline characters.
+   */
+  public static String shortDebugString(final MessageOrBuilder message) {
+    try {
+      final StringBuilder sb = new StringBuilder();
+      SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
+      // Single line mode currently might have an extra space at the end.
+      return sb.toString().trim();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  /**
+   * Generates a human readable form of the unknown fields, useful for debugging
+   * and other purposes, with no newline characters.
+   */
+  public static String shortDebugString(final UnknownFieldSet fields) {
+    try {
+      final StringBuilder sb = new StringBuilder();
+      SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
+      // Single line mode currently might have an extra space at the end.
+      return sb.toString().trim();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
   }
 
   /**
    * Like {@code print()}, but writes directly to a {@code String} and
    * returns it.
    */
-  public static String printToString(final Message message) {
+  public static String printToString(final MessageOrBuilder message) {
     try {
       final StringBuilder text = new StringBuilder();
       print(message, text);
       return text.toString();
     } catch (IOException e) {
-      throw new RuntimeException(
-        "Writing to a StringBuilder threw an IOException (should never " +
-        "happen).", e);
+      throw new IllegalStateException(e);
     }
   }
 
@@ -101,28 +157,43 @@ public final class TextFormat {
       print(fields, text);
       return text.toString();
     } catch (IOException e) {
-      throw new RuntimeException(
-        "Writing to a StringBuilder threw an IOException (should never " +
-        "happen).", e);
+      throw new IllegalStateException(e);
     }
   }
 
-  private static void print(final Message message,
-                            final TextGenerator generator)
-      throws IOException {
-    for (final Map.Entry<FieldDescriptor, Object> field :
-         message.getAllFields().entrySet()) {
-      printField(field.getKey(), field.getValue(), generator);
+  /**
+   * Same as {@code printToString()}, except that non-ASCII characters
+   * in string type fields are not escaped in backslash+octals.
+   */
+  public static String printToUnicodeString(final MessageOrBuilder message) {
+    try {
+      final StringBuilder text = new StringBuilder();
+      UNICODE_PRINTER.print(message, new TextGenerator(text));
+      return text.toString();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  /**
+   * Same as {@code printToString()}, except that non-ASCII characters
+   * in string type fields are not escaped in backslash+octals.
+   */
+  public static String printToUnicodeString(final UnknownFieldSet fields) {
+    try {
+      final StringBuilder text = new StringBuilder();
+      UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
+      return text.toString();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
     }
-    printUnknownFields(message.getUnknownFields(), generator);
   }
 
   public static void printField(final FieldDescriptor field,
                                 final Object value,
                                 final Appendable output)
                                 throws IOException {
-    final TextGenerator generator = new TextGenerator(output);
-    printField(field, value, generator);
+    DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
   }
 
   public static String printFieldToString(final FieldDescriptor field,
@@ -132,173 +203,298 @@ public final class TextFormat {
       printField(field, value, text);
       return text.toString();
     } catch (IOException e) {
-      throw new RuntimeException(
-        "Writing to a StringBuilder threw an IOException (should never " +
-        "happen).", e);
+      throw new IllegalStateException(e);
     }
   }
 
-  private static void printField(final FieldDescriptor field,
-                                final Object value,
-                                final TextGenerator generator)
-                                throws IOException {
-    if (field.isRepeated()) {
-      // Repeated field.  Print each element.
-      for (final Object element : (List) value) {
-        printSingleField(field, element, generator);
-      }
-    } else {
-      printSingleField(field, value, generator);
+  /**
+   * Outputs a textual representation of the value of given field value.
+   *
+   * @param field the descriptor of the field
+   * @param value the value of the field
+   * @param output the output to which to append the formatted value
+   * @throws ClassCastException if the value is not appropriate for the
+   *     given field descriptor
+   * @throws IOException if there is an exception writing to the output
+   */
+  public static void printFieldValue(final FieldDescriptor field,
+                                     final Object value,
+                                     final Appendable output)
+                                     throws IOException {
+    DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
+  }
+
+  /**
+   * Outputs a textual representation of the value of an unknown field.
+   *
+   * @param tag the field's tag number
+   * @param value the value of the field
+   * @param output the output to which to append the formatted value
+   * @throws ClassCastException if the value is not appropriate for the
+   *     given field descriptor
+   * @throws IOException if there is an exception writing to the output
+   */
+  public static void printUnknownFieldValue(final int tag,
+                                            final Object value,
+                                            final Appendable output)
+                                            throws IOException {
+    printUnknownFieldValue(tag, value, new TextGenerator(output));
+  }
+
+  private static void printUnknownFieldValue(final int tag,
+                                             final Object value,
+                                             final TextGenerator generator)
+                                             throws IOException {
+    switch (WireFormat.getTagWireType(tag)) {
+      case WireFormat.WIRETYPE_VARINT:
+        generator.print(unsignedToString((Long) value));
+        break;
+      case WireFormat.WIRETYPE_FIXED32:
+        generator.print(
+            String.format((Locale) null, "0x%08x", (Integer) value));
+        break;
+      case WireFormat.WIRETYPE_FIXED64:
+        generator.print(String.format((Locale) null, "0x%016x", (Long) value));
+        break;
+      case WireFormat.WIRETYPE_LENGTH_DELIMITED:
+        generator.print("\"");
+        generator.print(escapeBytes((ByteString) value));
+        generator.print("\"");
+        break;
+      case WireFormat.WIRETYPE_START_GROUP:
+        DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
+        break;
+      default:
+        throw new IllegalArgumentException("Bad tag: " + tag);
     }
   }
 
-  private static void printSingleField(final FieldDescriptor field,
-                                       final Object value,
-                                       final TextGenerator generator)
-                                       throws IOException {
-    if (field.isExtension()) {
-      generator.print("[");
-      // We special-case MessageSet elements for compatibility with proto1.
-      if (field.getContainingType().getOptions().getMessageSetWireFormat()
-          && (field.getType() == FieldDescriptor.Type.MESSAGE)
-          && (field.isOptional())
-          // object equality
-          && (field.getExtensionScope() == field.getMessageType())) {
-        generator.print(field.getMessageType().getFullName());
-      } else {
-        generator.print(field.getFullName());
+  /** Helper class for converting protobufs to text. */
+  private static final class Printer {
+    /** Whether to omit newlines from the output. */
+    boolean singleLineMode = false;
+
+    /** Whether to escape non ASCII characters with backslash and octal. */
+    boolean escapeNonAscii = true;
+
+    private Printer() {}
+
+    /** Setter of singleLineMode */
+    private Printer setSingleLineMode(boolean singleLineMode) {
+      this.singleLineMode = singleLineMode;
+      return this;
+    }
+
+    /** Setter of escapeNonAscii */
+    private Printer setEscapeNonAscii(boolean escapeNonAscii) {
+      this.escapeNonAscii = escapeNonAscii;
+      return this;
+    }
+
+    private void print(
+        final MessageOrBuilder message, final TextGenerator generator)
+        throws IOException {
+      for (Map.Entry<FieldDescriptor, Object> field
+          : message.getAllFields().entrySet()) {
+        printField(field.getKey(), field.getValue(), generator);
       }
-      generator.print("]");
-    } else {
-      if (field.getType() == FieldDescriptor.Type.GROUP) {
-        // Groups must be serialized with their original capitalization.
-        generator.print(field.getMessageType().getName());
+      printUnknownFields(message.getUnknownFields(), generator);
+    }
+
+    private void printField(final FieldDescriptor field, final Object value,
+        final TextGenerator generator) throws IOException {
+      if (field.isRepeated()) {
+        // Repeated field.  Print each element.
+        for (Object element : (List<?>) value) {
+          printSingleField(field, element, generator);
+        }
       } else {
-        generator.print(field.getName());
+        printSingleField(field, value, generator);
       }
     }
 
-    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-      generator.print(" {\n");
-      generator.indent();
-    } else {
-      generator.print(": ");
-    }
+    private void printSingleField(final FieldDescriptor field,
+                                  final Object value,
+                                  final TextGenerator generator)
+                                  throws IOException {
+      if (field.isExtension()) {
+        generator.print("[");
+        // We special-case MessageSet elements for compatibility with proto1.
+        if (field.getContainingType().getOptions().getMessageSetWireFormat()
+            && (field.getType() == FieldDescriptor.Type.MESSAGE)
+            && (field.isOptional())
+            // object equality
+            && (field.getExtensionScope() == field.getMessageType())) {
+          generator.print(field.getMessageType().getFullName());
+        } else {
+          generator.print(field.getFullName());
+        }
+        generator.print("]");
+      } else {
+        if (field.getType() == FieldDescriptor.Type.GROUP) {
+          // Groups must be serialized with their original capitalization.
+          generator.print(field.getMessageType().getName());
+        } else {
+          generator.print(field.getName());
+        }
+      }
 
-    printFieldValue(field, value, generator);
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        if (singleLineMode) {
+          generator.print(" { ");
+        } else {
+          generator.print(" {\n");
+          generator.indent();
+        }
+      } else {
+        generator.print(": ");
+      }
+
+      printFieldValue(field, value, generator);
 
-    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-      generator.outdent();
-      generator.print("}");
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        if (singleLineMode) {
+          generator.print("} ");
+        } else {
+          generator.outdent();
+          generator.print("}\n");
+        }
+      } else {
+        if (singleLineMode) {
+          generator.print(" ");
+        } else {
+          generator.print("\n");
+        }
+      }
     }
-    generator.print("\n");
-  }
 
-  private static void printFieldValue(final FieldDescriptor field,
-                                      final Object value,
-                                      final TextGenerator generator)
-                                      throws IOException {
-    switch (field.getType()) {
-      case INT32:
-      case INT64:
-      case SINT32:
-      case SINT64:
-      case SFIXED32:
-      case SFIXED64:
-      case FLOAT:
-      case DOUBLE:
-      case BOOL:
-        // Good old toString() does what we want for these types.
-        generator.print(value.toString());
-        break;
+    private void printFieldValue(final FieldDescriptor field,
+                                 final Object value,
+                                 final TextGenerator generator)
+                                 throws IOException {
+      switch (field.getType()) {
+        case INT32:
+        case SINT32:
+        case SFIXED32:
+          generator.print(((Integer) value).toString());
+          break;
 
-      case UINT32:
-      case FIXED32:
-        generator.print(unsignedToString((Integer) value));
-        break;
+        case INT64:
+        case SINT64:
+        case SFIXED64:
+          generator.print(((Long) value).toString());
+          break;
 
-      case UINT64:
-      case FIXED64:
-        generator.print(unsignedToString((Long) value));
-        break;
+        case BOOL:
+          generator.print(((Boolean) value).toString());
+          break;
 
-      case STRING:
-        generator.print("\"");
-        generator.print(escapeText((String) value));
-        generator.print("\"");
-        break;
+        case FLOAT:
+          generator.print(((Float) value).toString());
+          break;
 
-      case BYTES:
-        generator.print("\"");
-        generator.print(escapeBytes((ByteString) value));
-        generator.print("\"");
-        break;
+        case DOUBLE:
+          generator.print(((Double) value).toString());
+          break;
 
-      case ENUM:
-        generator.print(((EnumValueDescriptor) value).getName());
-        break;
+        case UINT32:
+        case FIXED32:
+          generator.print(unsignedToString((Integer) value));
+          break;
 
-      case MESSAGE:
-      case GROUP:
-        print((Message) value, generator);
-        break;
-    }
-  }
+        case UINT64:
+        case FIXED64:
+          generator.print(unsignedToString((Long) value));
+          break;
 
-  private static void printUnknownFields(final UnknownFieldSet unknownFields,
-                                         final TextGenerator generator)
-                                         throws IOException {
-    for (final Map.Entry<Integer, UnknownFieldSet.Field> entry :
-         unknownFields.asMap().entrySet()) {
-      final String prefix = entry.getKey().toString() + ": ";
-      final UnknownFieldSet.Field field = entry.getValue();
+        case STRING:
+          generator.print("\"");
+          generator.print(escapeNonAscii ?
+              escapeText((String) value) :
+              escapeDoubleQuotesAndBackslashes((String) value));
+          generator.print("\"");
+          break;
 
-      for (final long value : field.getVarintList()) {
-        generator.print(entry.getKey().toString());
-        generator.print(": ");
-        generator.print(unsignedToString(value));
-        generator.print("\n");
+        case BYTES:
+          generator.print("\"");
+          if (value instanceof ByteString) {
+            generator.print(escapeBytes((ByteString) value));
+          } else {
+            generator.print(escapeBytes((byte[]) value));
+          }
+          generator.print("\"");
+          break;
+
+        case ENUM:
+          generator.print(((EnumValueDescriptor) value).getName());
+          break;
+
+        case MESSAGE:
+        case GROUP:
+          print((Message) value, generator);
+          break;
       }
-      for (final int value : field.getFixed32List()) {
-        generator.print(entry.getKey().toString());
-        generator.print(": ");
-        generator.print(String.format((Locale) null, "0x%08x", value));
-        generator.print("\n");
+    }
+
+    private void printUnknownFields(final UnknownFieldSet unknownFields,
+                                    final TextGenerator generator)
+                                    throws IOException {
+      for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
+               unknownFields.asMap().entrySet()) {
+        final int number = entry.getKey();
+        final UnknownFieldSet.Field field = entry.getValue();
+        printUnknownField(number, WireFormat.WIRETYPE_VARINT,
+            field.getVarintList(), generator);
+        printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
+            field.getFixed32List(), generator);
+        printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
+            field.getFixed64List(), generator);
+        printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
+            field.getLengthDelimitedList(), generator);
+        for (final UnknownFieldSet value : field.getGroupList()) {
+          generator.print(entry.getKey().toString());
+          if (singleLineMode) {
+            generator.print(" { ");
+          } else {
+            generator.print(" {\n");
+            generator.indent();
+          }
+          printUnknownFields(value, generator);
+          if (singleLineMode) {
+            generator.print("} ");
+          } else {
+            generator.outdent();
+            generator.print("}\n");
+          }
+        }
       }
-      for (final long value : field.getFixed64List()) {
-        generator.print(entry.getKey().toString());
+    }
+
+    private void printUnknownField(final int number,
+                                   final int wireType,
+                                   final List<?> values,
+                                   final TextGenerator generator)
+                                   throws IOException {
+      for (final Object value : values) {
+        generator.print(String.valueOf(number));
         generator.print(": ");
-        generator.print(String.format((Locale) null, "0x%016x", value));
-        generator.print("\n");
-      }
-      for (final ByteString value : field.getLengthDelimitedList()) {
-        generator.print(entry.getKey().toString());
-        generator.print(": \"");
-        generator.print(escapeBytes(value));
-        generator.print("\"\n");
-      }
-      for (final UnknownFieldSet value : field.getGroupList()) {
-        generator.print(entry.getKey().toString());
-        generator.print(" {\n");
-        generator.indent();
-        printUnknownFields(value, generator);
-        generator.outdent();
-        generator.print("}\n");
+        printUnknownFieldValue(wireType, value, generator);
+        generator.print(singleLineMode ? " " : "\n");
       }
     }
   }
 
   /** Convert an unsigned 32-bit integer to a string. */
-  private static String unsignedToString(final int value) {
+  public static String unsignedToString(final int value) {
     if (value >= 0) {
       return Integer.toString(value);
     } else {
-      return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
+      return Long.toString(value & 0x00000000FFFFFFFFL);
     }
   }
 
   /** Convert an unsigned 64-bit integer to a string. */
-  private static String unsignedToString(final long value) {
+  public static String unsignedToString(final long value) {
     if (value >= 0) {
       return Long.toString(value);
     } else {
@@ -313,9 +509,9 @@ public final class TextFormat {
    * An inner class for writing text to the output stream.
    */
   private static final class TextGenerator {
-    private Appendable output;
-    private boolean atStartOfLine = true;
+    private final Appendable output;
     private final StringBuilder indent = new StringBuilder();
+    private boolean atStartOfLine = true;
 
     private TextGenerator(final Appendable output) {
       this.output = output;
@@ -352,17 +548,16 @@ public final class TextFormat {
 
       for (int i = 0; i < size; i++) {
         if (text.charAt(i) == '\n') {
-          write(text.subSequence(pos, size), i - pos + 1);
+          write(text.subSequence(pos, i + 1));
           pos = i + 1;
           atStartOfLine = true;
         }
       }
-      write(text.subSequence(pos, size), size - pos);
+      write(text.subSequence(pos, size));
     }
 
-    private void write(final CharSequence data, final int size)
-                       throws IOException {
-      if (size == 0) {
+    private void write(final CharSequence data) throws IOException {
+      if (data.length() == 0) {
         return;
       }
       if (atStartOfLine) {
@@ -421,7 +616,7 @@ public final class TextFormat {
     private int previousLine = 0;
     private int previousColumn = 0;
 
-    // We use possesive quantifiers (*+ and ++) because otherwise the Java
+    // We use possessive quantifiers (*+ and ++) because otherwise the Java
     // regex matcher has stack overflows on large inputs.
     private static final Pattern WHITESPACE =
       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
@@ -539,6 +734,14 @@ public final class TextFormat {
     }
 
     /**
+     * Returns {@code true} if the current token's text is equal to that
+     * specified.
+     */
+    public boolean lookingAt(String text) {
+      return currentToken.equals(text);
+    }
+
+    /**
      * If the next token is an identifier, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -551,7 +754,8 @@ public final class TextFormat {
             (c == '_') || (c == '.')) {
           // OK
         } else {
-          throw parseException("Expected identifier.");
+          throw parseException(
+              "Expected identifier. Found '" + currentToken + "'");
         }
       }
 
@@ -561,6 +765,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is an identifier, consume it and return {@code true}.
+     * Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeIdentifier() {
+      try {
+        consumeIdentifier();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a 32-bit signed integer, consume it and return its
      * value.  Otherwise, throw a {@link ParseException}.
      */
@@ -603,6 +820,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a 64-bit signed integer, consume it and return
+     * {@code true}.  Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeInt64() {
+      try {
+        consumeInt64();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a 64-bit unsigned integer, consume it and return its
      * value.  Otherwise, throw a {@link ParseException}.
      */
@@ -617,6 +847,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a 64-bit unsigned integer, consume it and return
+     * {@code true}.  Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeUInt64() {
+      try {
+        consumeUInt64();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a double, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -642,6 +885,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a double, consume it and return {@code true}.
+     * Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeDouble() {
+      try {
+        consumeDouble();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a float, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -667,14 +923,31 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a float, consume it and return {@code true}.
+     * Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeFloat() {
+      try {
+        consumeFloat();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a boolean, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
     public boolean consumeBoolean() throws ParseException {
-      if (currentToken.equals("true")) {
+      if (currentToken.equals("true") ||
+          currentToken.equals("t") ||
+          currentToken.equals("1")) {
         nextToken();
         return true;
-      } else if (currentToken.equals("false")) {
+      } else if (currentToken.equals("false") ||
+                 currentToken.equals("f") ||
+                 currentToken.equals("0")) {
         nextToken();
         return false;
       } else {
@@ -691,6 +964,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a string, consume it and return true.  Otherwise,
+     * return false.
+     */
+    public boolean tryConsumeString() {
+      try {
+        consumeString();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a string, consume it, unescape it as a
      * {@link ByteString}, and return it.  Otherwise, throw a
      * {@link ParseException}.
@@ -710,7 +996,8 @@ public final class TextFormat {
      * multiple adjacent tokens which are automatically concatenated, like in
      * C or Python.
      */
-    private void consumeByteString(List<ByteString> list) throws ParseException {
+    private void consumeByteString(List<ByteString> list)
+        throws ParseException {
       final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
                                                    : '\0';
       if (quote != '\"' && quote != '\'') {
@@ -740,7 +1027,7 @@ public final class TextFormat {
     public ParseException parseException(final String description) {
       // Note:  People generally prefer one-based line and column numbers.
       return new ParseException(
-        (line + 1) + ":" + (column + 1) + ": " + description);
+        line + 1, column + 1, description);
     }
 
     /**
@@ -751,7 +1038,7 @@ public final class TextFormat {
         final String description) {
       // Note:  People generally prefer one-based line and column numbers.
       return new ParseException(
-        (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
+        previousLine + 1, previousColumn + 1, description);
     }
 
     /**
@@ -776,11 +1063,58 @@ public final class TextFormat {
   public static class ParseException extends IOException {
     private static final long serialVersionUID = 3196188060225107702L;
 
+    private final int line;
+    private final int column;
+
+    /** Create a new instance, with -1 as the line and column numbers. */
     public ParseException(final String message) {
-      super(message);
+      this(-1, -1, message);
+    }
+
+    /**
+     * Create a new instance
+     *
+     * @param line the line number where the parse error occurred,
+     * using 1-offset.
+     * @param column the column number where the parser error occurred,
+     * using 1-offset.
+     */
+    public ParseException(final int line, final int column,
+        final String message) {
+      super(Integer.toString(line) + ":" + column + ": " + message);
+      this.line = line;
+      this.column = column;
+    }
+
+    /**
+     * Return the line where the parse exception occurred, or -1 when
+     * none is provided. The value is specified as 1-offset, so the first
+     * line is line 1.
+     */
+    public int getLine() {
+      return line;
+    }
+
+    /**
+     * Return the column where the parse exception occurred, or -1 when
+     * none is provided. The value is specified as 1-offset, so the first
+     * line is line 1.
+     */
+    public int getColumn() {
+      return column;
     }
   }
 
+  private static final Parser PARSER = Parser.newBuilder().build();
+
+  /**
+   * Return a {@link Parser} instance which can parse text-format
+   * messages. The returned instance is thread-safe.
+   */
+  public static Parser getParser() {
+    return PARSER;
+  }
+
   /**
    * Parse a text-format message from {@code input} and merge the contents
    * into {@code builder}.
@@ -788,7 +1122,7 @@ public final class TextFormat {
   public static void merge(final Readable input,
                            final Message.Builder builder)
                            throws IOException {
-    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    PARSER.merge(input, builder);
   }
 
   /**
@@ -798,7 +1132,7 @@ public final class TextFormat {
   public static void merge(final CharSequence input,
                            final Message.Builder builder)
                            throws ParseException {
-    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    PARSER.merge(input, builder);
   }
 
   /**
@@ -810,35 +1144,9 @@ public final class TextFormat {
                            final ExtensionRegistry extensionRegistry,
                            final Message.Builder builder)
                            throws IOException {
-    // Read the entire input to a String then parse that.
-
-    // If StreamTokenizer were not quite so crippled, or if there were a kind
-    // of Reader that could read in chunks that match some particular regex,
-    // or if we wanted to write a custom Reader to tokenize our stream, then
-    // we would not have to read to one big String.  Alas, none of these is
-    // the case.  Oh well.
-
-    merge(toStringBuilder(input), extensionRegistry, builder);
+    PARSER.merge(input, extensionRegistry, builder);
   }
 
-  private static final int BUFFER_SIZE = 4096;
-
-  // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
-  // overhead is worthwhile
-  private static StringBuilder toStringBuilder(final Readable input)
-      throws IOException {
-    final StringBuilder text = new StringBuilder();
-    final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
-    while (true) {
-      final int n = input.read(buffer);
-      if (n == -1) {
-        break;
-      }
-      buffer.flip();
-      text.append(buffer, 0, n);
-    }
-    return text;
-  }
 
   /**
    * Parse a text-format message from {@code input} and merge the contents
@@ -849,187 +1157,466 @@ public final class TextFormat {
                            final ExtensionRegistry extensionRegistry,
                            final Message.Builder builder)
                            throws ParseException {
-    final Tokenizer tokenizer = new Tokenizer(input);
-
-    while (!tokenizer.atEnd()) {
-      mergeField(tokenizer, extensionRegistry, builder);
-    }
+    PARSER.merge(input, extensionRegistry, builder);
   }
 
+
   /**
-   * Parse a single field from {@code tokenizer} and merge it into
-   * {@code builder}.
+   * Parser for text-format proto2 instances. This class is thread-safe.
+   * The implementation largely follows google/protobuf/text_format.cc.
+   *
+   * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or
+   * {@link Builder} to control the parser behavior.
    */
-  private static void mergeField(final Tokenizer tokenizer,
-                                 final ExtensionRegistry extensionRegistry,
-                                 final Message.Builder builder)
-                                 throws ParseException {
-    FieldDescriptor field;
-    final Descriptor type = builder.getDescriptorForType();
-    ExtensionRegistry.ExtensionInfo extension = null;
+  public static class Parser {
+    /**
+     * Determines if repeated values for non-repeated fields and
+     * oneofs are permitted. For example, given required/optional field "foo"
+     * and a oneof containing "baz" and "qux":
+     * <li>
+     * <ul>"foo: 1 foo: 2"
+     * <ul>"baz: 1 qux: 2"
+     * <ul>merging "foo: 2" into a proto in which foo is already set, or
+     * <ul>merging "qux: 2" into a proto in which baz is already set.
+     * </li>
+     */
+    public enum SingularOverwritePolicy {
+      /** The last value is retained. */
+      ALLOW_SINGULAR_OVERWRITES,
+      /** An error is issued. */
+      FORBID_SINGULAR_OVERWRITES
+    }
 
-    if (tokenizer.tryConsume("[")) {
-      // An extension.
-      final StringBuilder name =
-          new StringBuilder(tokenizer.consumeIdentifier());
-      while (tokenizer.tryConsume(".")) {
-        name.append('.');
-        name.append(tokenizer.consumeIdentifier());
-      }
+    private final boolean allowUnknownFields;
+    private final SingularOverwritePolicy singularOverwritePolicy;
+
+    private Parser(boolean allowUnknownFields,
+        SingularOverwritePolicy singularOverwritePolicy) {
+      this.allowUnknownFields = allowUnknownFields;
+      this.singularOverwritePolicy = singularOverwritePolicy;
+    }
+
+    /**
+     * Returns a new instance of {@link Builder}.
+     */
+    public static Builder newBuilder() {
+      return new Builder();
+    }
 
-      extension = extensionRegistry.findExtensionByName(name.toString());
+    /**
+     * Builder that can be used to obtain new instances of {@link Parser}.
+     */
+    public static class Builder {
+      private boolean allowUnknownFields = false;
+      private SingularOverwritePolicy singularOverwritePolicy =
+          SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
+
+      /**
+       * Sets parser behavior when a non-repeated field appears more than once.
+       */
+      public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
+        this.singularOverwritePolicy = p;
+        return this;
+      }
 
-      if (extension == null) {
-        throw tokenizer.parseExceptionPreviousToken(
-          "Extension \"" + name + "\" not found in the ExtensionRegistry.");
-      } else if (extension.descriptor.getContainingType() != type) {
-        throw tokenizer.parseExceptionPreviousToken(
-          "Extension \"" + name + "\" does not extend message type \"" +
-          type.getFullName() + "\".");
+      public Parser build() {
+        return new Parser(allowUnknownFields, singularOverwritePolicy);
       }
+    }
 
-      tokenizer.consume("]");
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.
+     */
+    public void merge(final Readable input,
+                      final Message.Builder builder)
+                      throws IOException {
+      merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    }
 
-      field = extension.descriptor;
-    } else {
-      final String name = tokenizer.consumeIdentifier();
-      field = type.findFieldByName(name);
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.
+     */
+    public void merge(final CharSequence input,
+                      final Message.Builder builder)
+                      throws ParseException {
+      merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    }
 
-      // Group names are expected to be capitalized as they appear in the
-      // .proto file, which actually matches their type names, not their field
-      // names.
-      if (field == null) {
-        // Explicitly specify US locale so that this code does not break when
-        // executing in Turkey.
-        final String lowerName = name.toLowerCase(Locale.US);
-        field = type.findFieldByName(lowerName);
-        // If the case-insensitive match worked but the field is NOT a group,
-        if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
-          field = null;
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.  Extensions will be recognized if they are
+     * registered in {@code extensionRegistry}.
+     */
+    public void merge(final Readable input,
+                      final ExtensionRegistry extensionRegistry,
+                      final Message.Builder builder)
+                      throws IOException {
+      // Read the entire input to a String then parse that.
+
+      // If StreamTokenizer were not quite so crippled, or if there were a kind
+      // of Reader that could read in chunks that match some particular regex,
+      // or if we wanted to write a custom Reader to tokenize our stream, then
+      // we would not have to read to one big String.  Alas, none of these is
+      // the case.  Oh well.
+
+      merge(toStringBuilder(input), extensionRegistry, builder);
+    }
+
+
+    private static final int BUFFER_SIZE = 4096;
+
+    // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
+    // overhead is worthwhile
+    private static StringBuilder toStringBuilder(final Readable input)
+        throws IOException {
+      final StringBuilder text = new StringBuilder();
+      final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
+      while (true) {
+        final int n = input.read(buffer);
+        if (n == -1) {
+          break;
         }
+        buffer.flip();
+        text.append(buffer, 0, n);
       }
-      // Again, special-case group names as described above.
-      if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
-          !field.getMessageType().getName().equals(name)) {
-        field = null;
-      }
+      return text;
+    }
 
-      if (field == null) {
-        throw tokenizer.parseExceptionPreviousToken(
-          "Message type \"" + type.getFullName() +
-          "\" has no field named \"" + name + "\".");
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.  Extensions will be recognized if they are
+     * registered in {@code extensionRegistry}.
+     */
+    public void merge(final CharSequence input,
+                      final ExtensionRegistry extensionRegistry,
+                      final Message.Builder builder)
+                      throws ParseException {
+      final Tokenizer tokenizer = new Tokenizer(input);
+      MessageReflection.BuilderAdapter target =
+          new MessageReflection.BuilderAdapter(builder);
+
+      while (!tokenizer.atEnd()) {
+        mergeField(tokenizer, extensionRegistry, target);
       }
     }
 
-    Object value = null;
 
-    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-      tokenizer.tryConsume(":");  // optional
+    /**
+     * Parse a single field from {@code tokenizer} and merge it into
+     * {@code builder}.
+     */
+    private void mergeField(final Tokenizer tokenizer,
+                            final ExtensionRegistry extensionRegistry,
+                            final MessageReflection.MergeTarget target)
+                            throws ParseException {
+      FieldDescriptor field = null;
+      final Descriptor type = target.getDescriptorForType();
+      ExtensionRegistry.ExtensionInfo extension = null;
+
+      if (tokenizer.tryConsume("[")) {
+        // An extension.
+        final StringBuilder name =
+            new StringBuilder(tokenizer.consumeIdentifier());
+        while (tokenizer.tryConsume(".")) {
+          name.append('.');
+          name.append(tokenizer.consumeIdentifier());
+        }
 
-      final String endToken;
-      if (tokenizer.tryConsume("<")) {
-        endToken = ">";
-      } else {
-        tokenizer.consume("{");
-        endToken = "}";
-      }
+        extension = target.findExtensionByName(
+            extensionRegistry, name.toString());
+
+        if (extension == null) {
+          if (!allowUnknownFields) {
+            throw tokenizer.parseExceptionPreviousToken(
+              "Extension \"" + name + "\" not found in the ExtensionRegistry.");
+          } else {
+            logger.warning(
+              "Extension \"" + name + "\" not found in the ExtensionRegistry.");
+          }
+        } else {
+          if (extension.descriptor.getContainingType() != type) {
+            throw tokenizer.parseExceptionPreviousToken(
+              "Extension \"" + name + "\" does not extend message type \"" +
+              type.getFullName() + "\".");
+          }
+          field = extension.descriptor;
+        }
 
-      final Message.Builder subBuilder;
-      if (extension == null) {
-        subBuilder = builder.newBuilderForField(field);
+        tokenizer.consume("]");
       } else {
-        subBuilder = extension.defaultInstance.newBuilderForType();
-      }
+        final String name = tokenizer.consumeIdentifier();
+        field = type.findFieldByName(name);
+
+        // Group names are expected to be capitalized as they appear in the
+        // .proto file, which actually matches their type names, not their field
+        // names.
+        if (field == null) {
+          // Explicitly specify US locale so that this code does not break when
+          // executing in Turkey.
+          final String lowerName = name.toLowerCase(Locale.US);
+          field = type.findFieldByName(lowerName);
+          // If the case-insensitive match worked but the field is NOT a group,
+          if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
+            field = null;
+          }
+        }
+        // Again, special-case group names as described above.
+        if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
+            !field.getMessageType().getName().equals(name)) {
+          field = null;
+        }
 
-      while (!tokenizer.tryConsume(endToken)) {
-        if (tokenizer.atEnd()) {
-          throw tokenizer.parseException(
-            "Expected \"" + endToken + "\".");
+        if (field == null) {
+          if (!allowUnknownFields) {
+            throw tokenizer.parseExceptionPreviousToken(
+              "Message type \"" + type.getFullName() +
+              "\" has no field named \"" + name + "\".");
+          } else {
+            logger.warning(
+              "Message type \"" + type.getFullName() +
+              "\" has no field named \"" + name + "\".");
+          }
         }
-        mergeField(tokenizer, extensionRegistry, subBuilder);
       }
 
-      value = subBuilder.build();
-
-    } else {
-      tokenizer.consume(":");
+      // Skips unknown fields.
+      if (field == null) {
+        // Try to guess the type of this field.
+        // If this field is not a message, there should be a ":" between the
+        // field name and the field value and also the field value should not
+        // start with "{" or "<" which indicates the begining of a message body.
+        // If there is no ":" or there is a "{" or "<" after ":", this field has
+        // to be a message or the input is ill-formed.
+        if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") &&
+            !tokenizer.lookingAt("<")) {
+          skipFieldValue(tokenizer);
+        } else {
+          skipFieldMessage(tokenizer);
+        }
+        return;
+      }
 
-      switch (field.getType()) {
-        case INT32:
-        case SINT32:
-        case SFIXED32:
-          value = tokenizer.consumeInt32();
-          break;
+      // Handle potential ':'.
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        tokenizer.tryConsume(":");  // optional
+      } else {
+        tokenizer.consume(":");  // required
+      }
+      // Support specifying repeated field values as a comma-separated list.
+      // Ex."foo: [1, 2, 3]"
+      if (field.isRepeated() && tokenizer.tryConsume("[")) {
+        while (true) {
+          consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+          if (tokenizer.tryConsume("]")) {
+            // End of list.
+            break;
+          }
+          tokenizer.consume(",");
+        }
+      } else {
+        consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+      }
+    }
 
-        case INT64:
-        case SINT64:
-        case SFIXED64:
-          value = tokenizer.consumeInt64();
-          break;
+    /**
+     * Parse a single field value from {@code tokenizer} and merge it into
+     * {@code builder}.
+     */
+    private void consumeFieldValue(
+        final Tokenizer tokenizer,
+        final ExtensionRegistry extensionRegistry,
+        final MessageReflection.MergeTarget target,
+        final FieldDescriptor field,
+        final ExtensionRegistry.ExtensionInfo extension)
+        throws ParseException {
+      Object value = null;
+
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        final String endToken;
+        if (tokenizer.tryConsume("<")) {
+          endToken = ">";
+        } else {
+          tokenizer.consume("{");
+          endToken = "}";
+        }
 
-        case UINT32:
-        case FIXED32:
-          value = tokenizer.consumeUInt32();
-          break;
+        final MessageReflection.MergeTarget subField;
+        subField = target.newMergeTargetForField(field,
+            (extension == null) ? null : extension.defaultInstance);
 
-        case UINT64:
-        case FIXED64:
-          value = tokenizer.consumeUInt64();
-          break;
+        while (!tokenizer.tryConsume(endToken)) {
+          if (tokenizer.atEnd()) {
+            throw tokenizer.parseException(
+              "Expected \"" + endToken + "\".");
+          }
+          mergeField(tokenizer, extensionRegistry, subField);
+        }
 
-        case FLOAT:
-          value = tokenizer.consumeFloat();
-          break;
+        value = subField.finish();
 
-        case DOUBLE:
-          value = tokenizer.consumeDouble();
-          break;
+      } else {
+        switch (field.getType()) {
+          case INT32:
+          case SINT32:
+          case SFIXED32:
+            value = tokenizer.consumeInt32();
+            break;
+
+          case INT64:
+          case SINT64:
+          case SFIXED64:
+            value = tokenizer.consumeInt64();
+            break;
+
+          case UINT32:
+          case FIXED32:
+            value = tokenizer.consumeUInt32();
+            break;
+
+          case UINT64:
+          case FIXED64:
+            value = tokenizer.consumeUInt64();
+            break;
+
+          case FLOAT:
+            value = tokenizer.consumeFloat();
+            break;
+
+          case DOUBLE:
+            value = tokenizer.consumeDouble();
+            break;
+
+          case BOOL:
+            value = tokenizer.consumeBoolean();
+            break;
+
+          case STRING:
+            value = tokenizer.consumeString();
+            break;
+
+          case BYTES:
+            value = tokenizer.consumeByteString();
+            break;
+
+          case ENUM:
+            final EnumDescriptor enumType = field.getEnumType();
+
+            if (tokenizer.lookingAtInteger()) {
+              final int number = tokenizer.consumeInt32();
+              value = enumType.findValueByNumber(number);
+              if (value == null) {
+                throw tokenizer.parseExceptionPreviousToken(
+                  "Enum type \"" + enumType.getFullName() +
+                  "\" has no value with number " + number + '.');
+              }
+            } else {
+              final String id = tokenizer.consumeIdentifier();
+              value = enumType.findValueByName(id);
+              if (value == null) {
+                throw tokenizer.parseExceptionPreviousToken(
+                  "Enum type \"" + enumType.getFullName() +
+                  "\" has no value named \"" + id + "\".");
+              }
+            }
 
-        case BOOL:
-          value = tokenizer.consumeBoolean();
-          break;
+            break;
 
-        case STRING:
-          value = tokenizer.consumeString();
-          break;
+          case MESSAGE:
+          case GROUP:
+            throw new RuntimeException("Can't get here.");
+        }
+      }
 
-        case BYTES:
-          value = tokenizer.consumeByteString();
-          break;
+      if (field.isRepeated()) {
+        target.addRepeatedField(field, value);
+      } else if ((singularOverwritePolicy
+              == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+          && target.hasField(field)) {
+        throw tokenizer.parseExceptionPreviousToken("Non-repeated field \""
+            + field.getFullName() + "\" cannot be overwritten.");
+      } else if ((singularOverwritePolicy
+              == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+          && field.getContainingOneof() != null
+          && target.hasOneof(field.getContainingOneof())) {
+        Descriptors.OneofDescriptor oneof = field.getContainingOneof();
+        throw tokenizer.parseExceptionPreviousToken("Field \""
+            + field.getFullName() + "\" is specified along with field \""
+            + target.getOneofFieldDescriptor(oneof).getFullName()
+            + "\", another member of oneof \"" + oneof.getName() + "\".");
+      } else {
+        target.setField(field, value);
+      }
+    }
 
-        case ENUM:
-          final EnumDescriptor enumType = field.getEnumType();
-
-          if (tokenizer.lookingAtInteger()) {
-            final int number = tokenizer.consumeInt32();
-            value = enumType.findValueByNumber(number);
-            if (value == null) {
-              throw tokenizer.parseExceptionPreviousToken(
-                "Enum type \"" + enumType.getFullName() +
-                "\" has no value with number " + number + '.');
-            }
-          } else {
-            final String id = tokenizer.consumeIdentifier();
-            value = enumType.findValueByName(id);
-            if (value == null) {
-              throw tokenizer.parseExceptionPreviousToken(
-                "Enum type \"" + enumType.getFullName() +
-                "\" has no value named \"" + id + "\".");
-            }
-          }
+    /**
+     * Skips the next field including the field's name and value.
+     */
+    private void skipField(Tokenizer tokenizer) throws ParseException {
+      if (tokenizer.tryConsume("[")) {
+        // Extension name.
+        do {
+          tokenizer.consumeIdentifier();
+        } while (tokenizer.tryConsume("."));
+        tokenizer.consume("]");
+      } else {
+        tokenizer.consumeIdentifier();
+      }
 
-          break;
+      // Try to guess the type of this field.
+      // If this field is not a message, there should be a ":" between the
+      // field name and the field value and also the field value should not
+      // start with "{" or "<" which indicates the begining of a message body.
+      // If there is no ":" or there is a "{" or "<" after ":", this field has
+      // to be a message or the input is ill-formed.
+      if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") &&
+          !tokenizer.lookingAt("{")) {
+        skipFieldValue(tokenizer);
+      } else {
+        skipFieldMessage(tokenizer);
+      }
+      // For historical reasons, fields may optionally be separated by commas or
+      // semicolons.
+      if (!tokenizer.tryConsume(";")) {
+        tokenizer.tryConsume(",");
+      }
+    }
 
-        case MESSAGE:
-        case GROUP:
-          throw new RuntimeException("Can't get here.");
+    /**
+     * Skips the whole body of a message including the beginning delimeter and
+     * the ending delimeter.
+     */
+    private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
+      final String delimiter;
+      if (tokenizer.tryConsume("<")) {
+        delimiter = ">";
+      } else {
+        tokenizer.consume("{");
+        delimiter = "}";
       }
+      while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
+        skipField(tokenizer);
+      }
+      tokenizer.consume(delimiter);
     }
 
-    if (field.isRepeated()) {
-      builder.addRepeatedField(field, value);
-    } else {
-      builder.setField(field, value);
+    /**
+     * Skips a field value.
+     */
+    private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
+      if (tokenizer.tryConsumeString()) {
+        while (tokenizer.tryConsumeString()) {}
+        return;
+      }
+      if (!tokenizer.tryConsumeIdentifier() &&  // includes enum & boolean
+          !tokenizer.tryConsumeInt64() &&       // includes int32
+          !tokenizer.tryConsumeUInt64() &&      // includes uint32
+          !tokenizer.tryConsumeDouble() &&
+          !tokenizer.tryConsumeFloat()) {
+        throw tokenizer.parseException(
+            "Invalid field value: " + tokenizer.currentToken);
+      }
     }
   }
 
@@ -1039,6 +1626,11 @@ public final class TextFormat {
   // Some of these methods are package-private because Descriptors.java uses
   // them.
 
+  private interface ByteSequence {
+    int size();
+    byte byteAt(int offset);
+  }
+
   /**
    * Escapes bytes in the format used in protocol buffer text format, which
    * is the same as the format used for C string literals.  All bytes
@@ -1047,7 +1639,7 @@ public final class TextFormat {
    * which no defined short-hand escape sequence is defined will be escaped
    * using 3-digit octal sequences.
    */
-  static String escapeBytes(final ByteString input) {
+  private static String escapeBytes(final ByteSequence input) {
     final StringBuilder builder = new StringBuilder(input.size());
     for (int i = 0; i < input.size(); i++) {
       final byte b = input.byteAt(i);
@@ -1064,6 +1656,9 @@ public final class TextFormat {
         case '\'': builder.append("\\\'"); break;
         case '"' : builder.append("\\\""); break;
         default:
+          // Note:  Bytes with the high-order bit set should be escaped.  Since
+          //   bytes are signed, such bytes will compare less than 0x20, hence
+          //   the following line is correct.
           if (b >= 0x20) {
             builder.append((char) b);
           } else {
@@ -1079,31 +1674,74 @@ public final class TextFormat {
   }
 
   /**
+   * Escapes bytes in the format used in protocol buffer text format, which
+   * is the same as the format used for C string literals.  All bytes
+   * that are not printable 7-bit ASCII characters are escaped, as well as
+   * backslash, single-quote, and double-quote characters.  Characters for
+   * which no defined short-hand escape sequence is defined will be escaped
+   * using 3-digit octal sequences.
+   */
+  static String escapeBytes(final ByteString input) {
+    return escapeBytes(new ByteSequence() {
+      public int size() {
+        return input.size();
+      }
+      public byte byteAt(int offset) {
+        return input.byteAt(offset);
+      }
+    });
+  }
+
+  /**
+   * Like {@link #escapeBytes(ByteString)}, but used for byte array.
+   */
+  static String escapeBytes(final byte[] input) {
+    return escapeBytes(new ByteSequence() {
+      public int size() {
+        return input.length;
+      }
+      public byte byteAt(int offset) {
+        return input[offset];
+      }
+    });
+  }
+
+  /**
    * Un-escape a byte sequence as escaped using
    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
    * "\x") are also recognized.
    */
-  static ByteString unescapeBytes(final CharSequence input)
+  static ByteString unescapeBytes(final CharSequence charString)
       throws InvalidEscapeSequenceException {
-    final byte[] result = new byte[input.length()];
+    // First convert the Java character sequence to UTF-8 bytes.
+    ByteString input = ByteString.copyFromUtf8(charString.toString());
+    // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
+    // escapes can all be expressed with ASCII characters, so it is safe to
+    // operate on bytes here.
+    //
+    // Unescaping the input byte array will result in a byte sequence that's no
+    // longer than the input.  That's because each escape sequence is between
+    // two and four bytes long and stands for a single byte.
+    final byte[] result = new byte[input.size()];
     int pos = 0;
-    for (int i = 0; i < input.length(); i++) {
-      char c = input.charAt(i);
+    for (int i = 0; i < input.size(); i++) {
+      byte c = input.byteAt(i);
       if (c == '\\') {
-        if (i + 1 < input.length()) {
+        if (i + 1 < input.size()) {
           ++i;
-          c = input.charAt(i);
+          c = input.byteAt(i);
           if (isOctal(c)) {
             // Octal escape.
             int code = digitValue(c);
-            if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) {
+            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
               ++i;
-              code = code * 8 + digitValue(input.charAt(i));
+              code = code * 8 + digitValue(input.byteAt(i));
             }
-            if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) {
+            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
               ++i;
-              code = code * 8 + digitValue(input.charAt(i));
+              code = code * 8 + digitValue(input.byteAt(i));
             }
+            // TODO: Check that 0 <= code && code <= 0xFF.
             result[pos++] = (byte)code;
           } else {
             switch (c) {
@@ -1121,31 +1759,31 @@ public final class TextFormat {
               case 'x':
                 // hex escape
                 int code = 0;
-                if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
+                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
                   ++i;
-                  code = digitValue(input.charAt(i));
+                  code = digitValue(input.byteAt(i));
                 } else {
                   throw new InvalidEscapeSequenceException(
-                    "Invalid escape sequence: '\\x' with no digits");
+                      "Invalid escape sequence: '\\x' with no digits");
                 }
-                if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
+                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
                   ++i;
-                  code = code * 16 + digitValue(input.charAt(i));
+                  code = code * 16 + digitValue(input.byteAt(i));
                 }
                 result[pos++] = (byte)code;
                 break;
 
               default:
                 throw new InvalidEscapeSequenceException(
-                  "Invalid escape sequence: '\\" + c + '\'');
+                    "Invalid escape sequence: '\\" + (char)c + '\'');
             }
           }
         } else {
           throw new InvalidEscapeSequenceException(
-            "Invalid escape sequence: '\\' at end of string.");
+              "Invalid escape sequence: '\\' at end of string.");
         }
       } else {
-        result[pos++] = (byte)c;
+        result[pos++] = c;
       }
     }
 
@@ -1174,6 +1812,13 @@ public final class TextFormat {
   }
 
   /**
+   * Escape double quotes and backslashes in a String for unicode output of a message.
+   */
+  public static String escapeDoubleQuotesAndBackslashes(final String input) {
+    return input.replace("\\", "\\\\").replace("\"", "\\\"");
+  }
+
+  /**
    * Un-escape a text string as escaped using {@link #escapeText(String)}.
    * Two-digit hex escapes (starting with "\x") are also recognized.
    */
@@ -1183,12 +1828,12 @@ public final class TextFormat {
   }
 
   /** Is this an octal digit? */
-  private static boolean isOctal(final char c) {
+  private static boolean isOctal(final byte c) {
     return '0' <= c && c <= '7';
   }
 
   /** Is this a hex digit? */
-  private static boolean isHex(final char c) {
+  private static boolean isHex(final byte c) {
     return ('0' <= c && c <= '9') ||
            ('a' <= c && c <= 'f') ||
            ('A' <= c && c <= 'F');
@@ -1199,7 +1844,7 @@ public final class TextFormat {
    * numeric value.  This is like {@code Character.digit()} but we don't accept
    * non-ASCII digits.
    */
-  private static int digitValue(final char c) {
+  private static int digitValue(final byte c) {
     if ('0' <= c && c <= '9') {
       return c - '0';
     } else if ('a' <= c && c <= 'z') {
@@ -1212,7 +1857,7 @@ public final class TextFormat {
   /**
    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.
+   * and "0" to signify hexadecimal and octal numbers, respectively.
    */
   static int parseInt32(final String text) throws NumberFormatException {
     return (int) parseInteger(text, true, false);
@@ -1221,7 +1866,7 @@ public final class TextFormat {
   /**
    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.  The
+   * and "0" to signify hexadecimal and octal numbers, respectively.  The
    * result is coerced to a (signed) {@code int} when returned since Java has
    * no unsigned integer type.
    */
@@ -1232,7 +1877,7 @@ public final class TextFormat {
   /**
    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.
+   * and "0" to signify hexadecimal and octal numbers, respectively.
    */
   static long parseInt64(final String text) throws NumberFormatException {
     return parseInteger(text, true, true);
@@ -1241,7 +1886,7 @@ public final class TextFormat {
   /**
    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.  The
+   * and "0" to signify hexadecimal and octal numbers, respectively.  The
    * result is coerced to a (signed) {@code long} when returned since Java has
    * no unsigned long type.
    */