aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/main/java/com/google/protobuf/ByteString.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/main/java/com/google/protobuf/ByteString.java')
-rw-r--r--java/src/main/java/com/google/protobuf/ByteString.java937
1 files changed, 784 insertions, 153 deletions
diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java
index 5fade03..7da5612 100644
--- a/java/src/main/java/com/google/protobuf/ByteString.java
+++ b/java/src/main/java/com/google/protobuf/ByteString.java
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// http://code.google.com/p/protobuf/
+// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -30,140 +30,426 @@
package com.google.protobuf;
-import java.io.InputStream;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
import java.util.List;
+import java.util.NoSuchElementException;
/**
- * Immutable array of bytes.
+ * Immutable sequence of bytes. Substring is supported by sharing the reference
+ * to the immutable underlying bytes, as with {@link String}. Concatenation is
+ * likewise supported without copying (long strings) by building a tree of
+ * pieces in {@link RopeByteString}.
+ * <p>
+ * Like {@link String}, the contents of a {@link ByteString} can never be
+ * observed to change, not even in the presence of a data race or incorrect
+ * API usage in the client code.
*
* @author crazybob@google.com Bob Lee
* @author kenton@google.com Kenton Varda
+ * @author carlanton@google.com Carl Haverl
+ * @author martinrb@google.com Martin Buchholz
*/
-public final class ByteString {
- private final byte[] bytes;
+public abstract class ByteString implements Iterable<Byte> {
- private ByteString(final byte[] bytes) {
- this.bytes = bytes;
- }
+ /**
+ * When two strings to be concatenated have a combined length shorter than
+ * this, we just copy their bytes on {@link #concat(ByteString)}.
+ * The trade-off is copy size versus the overhead of creating tree nodes
+ * in {@link RopeByteString}.
+ */
+ static final int CONCATENATE_BY_COPY_SIZE = 128;
+
+ /**
+ * When copying an InputStream into a ByteString with .readFrom(),
+ * the chunks in the underlying rope start at 256 bytes, but double
+ * each iteration up to 8192 bytes.
+ */
+ static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
+ static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
/**
- * Gets the byte at the given index.
+ * Empty {@code ByteString}.
+ */
+ public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
+
+ // This constructor is here to prevent subclassing outside of this package,
+ ByteString() {}
+
+ /**
+ * Gets the byte at the given index. This method should be used only for
+ * random access to individual bytes. To access bytes sequentially, use the
+ * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
+ * #substring(int, int)} first if necessary.
*
+ * @param index index of byte
+ * @return the value
* @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
*/
- public byte byteAt(final int index) {
- return bytes[index];
+ public abstract byte byteAt(int index);
+
+ /**
+ * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
+ * To avoid auto-boxing, you may get the iterator manually and call
+ * {@link ByteIterator#nextByte()}.
+ *
+ * @return the iterator
+ */
+ public abstract ByteIterator iterator();
+
+ /**
+ * This interface extends {@code Iterator<Byte>}, so that we can return an
+ * unboxed {@code byte}.
+ */
+ public interface ByteIterator extends Iterator<Byte> {
+ /**
+ * An alternative to {@link Iterator#next()} that returns an
+ * unboxed primitive {@code byte}.
+ *
+ * @return the next {@code byte} in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements
+ */
+ byte nextByte();
}
/**
* Gets the number of bytes.
+ *
+ * @return size in bytes
*/
- public int size() {
- return bytes.length;
- }
+ public abstract int size();
/**
* Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
+ *
+ * @return true if this is zero bytes long
*/
public boolean isEmpty() {
- return bytes.length == 0;
+ return size() == 0;
}
// =================================================================
- // byte[] -> ByteString
+ // ByteString -> substring
+
+ /**
+ * Return the substring from {@code beginIndex}, inclusive, to the end of the
+ * string.
+ *
+ * @param beginIndex start at this index
+ * @return substring sharing underlying data
+ * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
+ * {@code beginIndex > size()}.
+ */
+ public ByteString substring(int beginIndex) {
+ return substring(beginIndex, size());
+ }
+
+ /**
+ * Return the substring from {@code beginIndex}, inclusive, to {@code
+ * endIndex}, exclusive.
+ *
+ * @param beginIndex start at this index
+ * @param endIndex the last character is the one before this index
+ * @return substring sharing underlying data
+ * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
+ * {@code endIndex > size()}, or {@code beginIndex > endIndex}.
+ */
+ public abstract ByteString substring(int beginIndex, int endIndex);
/**
- * Empty ByteString.
+ * Tests if this bytestring starts with the specified prefix.
+ * Similar to {@link String#startsWith(String)}
+ *
+ * @param prefix the prefix.
+ * @return <code>true</code> if the byte sequence represented by the
+ * argument is a prefix of the byte sequence represented by
+ * this string; <code>false</code> otherwise.
*/
- public static final ByteString EMPTY = new ByteString(new byte[0]);
+ public boolean startsWith(ByteString prefix) {
+ return size() >= prefix.size() &&
+ substring(0, prefix.size()).equals(prefix);
+ }
+
+ /**
+ * Tests if this bytestring ends with the specified suffix.
+ * Similar to {@link String#endsWith(String)}
+ *
+ * @param suffix the suffix.
+ * @return <code>true</code> if the byte sequence represented by the
+ * argument is a suffix of the byte sequence represented by
+ * this string; <code>false</code> otherwise.
+ */
+ public boolean endsWith(ByteString suffix) {
+ return size() >= suffix.size() &&
+ substring(size() - suffix.size()).equals(suffix);
+ }
+
+ // =================================================================
+ // byte[] -> ByteString
/**
* Copies the given bytes into a {@code ByteString}.
+ *
+ * @param bytes source array
+ * @param offset offset in source array
+ * @param size number of bytes to copy
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final byte[] bytes, final int offset,
- final int size) {
- final byte[] copy = new byte[size];
+ public static ByteString copyFrom(byte[] bytes, int offset, int size) {
+ byte[] copy = new byte[size];
System.arraycopy(bytes, offset, copy, 0, size);
- return new ByteString(copy);
+ return new LiteralByteString(copy);
}
/**
* Copies the given bytes into a {@code ByteString}.
+ *
+ * @param bytes to copy
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final byte[] bytes) {
+ public static ByteString copyFrom(byte[] bytes) {
return copyFrom(bytes, 0, bytes.length);
}
/**
- * Copies {@code size} bytes from a {@code java.nio.ByteBuffer} into
+ * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
* a {@code ByteString}.
+ *
+ * @param bytes source buffer
+ * @param size number of bytes to copy
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final ByteBuffer bytes, final int size) {
- final byte[] copy = new byte[size];
+ public static ByteString copyFrom(ByteBuffer bytes, int size) {
+ byte[] copy = new byte[size];
bytes.get(copy);
- return new ByteString(copy);
+ return new LiteralByteString(copy);
}
/**
* Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
* a {@code ByteString}.
+ *
+ * @param bytes sourceBuffer
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final ByteBuffer bytes) {
+ public static ByteString copyFrom(ByteBuffer bytes) {
return copyFrom(bytes, bytes.remaining());
}
/**
* Encodes {@code text} into a sequence of bytes using the named charset
* and returns the result as a {@code ByteString}.
+ *
+ * @param text source string
+ * @param charsetName encoding to use
+ * @return new {@code ByteString}
+ * @throws UnsupportedEncodingException if the encoding isn't found
*/
- public static ByteString copyFrom(final String text, final String charsetName)
+ public static ByteString copyFrom(String text, String charsetName)
throws UnsupportedEncodingException {
- return new ByteString(text.getBytes(charsetName));
+ return new LiteralByteString(text.getBytes(charsetName));
}
/**
* Encodes {@code text} into a sequence of UTF-8 bytes and returns the
* result as a {@code ByteString}.
+ *
+ * @param text source string
+ * @return new {@code ByteString}
*/
- public static ByteString copyFromUtf8(final String text) {
+ public static ByteString copyFromUtf8(String text) {
try {
- return new ByteString(text.getBytes("UTF-8"));
+ return new LiteralByteString(text.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
}
+ // =================================================================
+ // InputStream -> ByteString
+
+ /**
+ * Completely reads the given stream's bytes into a
+ * {@code ByteString}, blocking if necessary until all bytes are
+ * read through to the end of the stream.
+ *
+ * <b>Performance notes:</b> The returned {@code ByteString} is an
+ * immutable tree of byte arrays ("chunks") of the stream data. The
+ * first chunk is small, with subsequent chunks each being double
+ * the size, up to 8K. If the caller knows the precise length of
+ * the stream and wishes to avoid all unnecessary copies and
+ * allocations, consider using the two-argument version of this
+ * method, below.
+ *
+ * @param streamToDrain The source stream, which is read completely
+ * but not closed.
+ * @return A new {@code ByteString} which is made up of chunks of
+ * various sizes, depending on the behavior of the underlying
+ * stream.
+ * @throws IOException IOException is thrown if there is a problem
+ * reading the underlying stream.
+ */
+ public static ByteString readFrom(InputStream streamToDrain)
+ throws IOException {
+ return readFrom(
+ streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
+ }
+
+ /**
+ * Completely reads the given stream's bytes into a
+ * {@code ByteString}, blocking if necessary until all bytes are
+ * read through to the end of the stream.
+ *
+ * <b>Performance notes:</b> The returned {@code ByteString} is an
+ * immutable tree of byte arrays ("chunks") of the stream data. The
+ * chunkSize parameter sets the size of these byte arrays. In
+ * particular, if the chunkSize is precisely the same as the length
+ * of the stream, unnecessary allocations and copies will be
+ * avoided. Otherwise, the chunks will be of the given size, except
+ * for the last chunk, which will be resized (via a reallocation and
+ * copy) to contain the remainder of the stream.
+ *
+ * @param streamToDrain The source stream, which is read completely
+ * but not closed.
+ * @param chunkSize The size of the chunks in which to read the
+ * stream.
+ * @return A new {@code ByteString} which is made up of chunks of
+ * the given size.
+ * @throws IOException IOException is thrown if there is a problem
+ * reading the underlying stream.
+ */
+ public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
+ throws IOException {
+ return readFrom(streamToDrain, chunkSize, chunkSize);
+ }
+
+ // Helper method that takes the chunk size range as a parameter.
+ public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
+ int maxChunkSize) throws IOException {
+ Collection<ByteString> results = new ArrayList<ByteString>();
+
+ // copy the inbound bytes into a list of chunks; the chunk size
+ // grows exponentially to support both short and long streams.
+ int chunkSize = minChunkSize;
+ while (true) {
+ ByteString chunk = readChunk(streamToDrain, chunkSize);
+ if (chunk == null) {
+ break;
+ }
+ results.add(chunk);
+ chunkSize = Math.min(chunkSize * 2, maxChunkSize);
+ }
+
+ return ByteString.copyFrom(results);
+ }
+
+ /**
+ * Blocks until a chunk of the given size can be made from the
+ * stream, or EOF is reached. Calls read() repeatedly in case the
+ * given stream implementation doesn't completely fill the given
+ * buffer in one read() call.
+ *
+ * @return A chunk of the desired size, or else a chunk as large as
+ * was available when end of stream was reached. Returns null if the
+ * given stream had no more data in it.
+ */
+ private static ByteString readChunk(InputStream in, final int chunkSize)
+ throws IOException {
+ final byte[] buf = new byte[chunkSize];
+ int bytesRead = 0;
+ while (bytesRead < chunkSize) {
+ final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
+ if (count == -1) {
+ break;
+ }
+ bytesRead += count;
+ }
+
+ if (bytesRead == 0) {
+ return null;
+ } else {
+ return ByteString.copyFrom(buf, 0, bytesRead);
+ }
+ }
+
+ // =================================================================
+ // Multiple ByteStrings -> One ByteString
+
+ /**
+ * Concatenate the given {@code ByteString} to this one. Short concatenations,
+ * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
+ * produced by copying the underlying bytes (as per Rope.java, <a
+ * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * BAP95 </a>. In general, the concatenate involves no copying.
+ *
+ * @param other string to concatenate
+ * @return a new {@code ByteString} instance
+ */
+ public ByteString concat(ByteString other) {
+ int thisSize = size();
+ int otherSize = other.size();
+ if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("ByteString would be too long: " +
+ thisSize + "+" + otherSize);
+ }
+
+ return RopeByteString.concatenate(this, other);
+ }
+
/**
- * Concatenates all byte strings in the list and returns the result.
+ * Concatenates all byte strings in the iterable and returns the result.
+ * This is designed to run in O(list size), not O(total bytes).
*
* <p>The returned {@code ByteString} is not necessarily a unique object.
* If the list is empty, the returned object is the singleton empty
* {@code ByteString}. If the list has only one element, that
* {@code ByteString} will be returned without copying.
+ *
+ * @param byteStrings strings to be concatenated
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(List<ByteString> list) {
- if (list.size() == 0) {
- return EMPTY;
- } else if (list.size() == 1) {
- return list.get(0);
+ public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
+ Collection<ByteString> collection;
+ if (!(byteStrings instanceof Collection)) {
+ collection = new ArrayList<ByteString>();
+ for (ByteString byteString : byteStrings) {
+ collection.add(byteString);
+ }
+ } else {
+ collection = (Collection<ByteString>) byteStrings;
}
-
- int size = 0;
- for (ByteString str : list) {
- size += str.size();
+ ByteString result;
+ if (collection.isEmpty()) {
+ result = EMPTY;
+ } else {
+ result = balancedConcat(collection.iterator(), collection.size());
}
- byte[] bytes = new byte[size];
- int pos = 0;
- for (ByteString str : list) {
- System.arraycopy(str.bytes, 0, bytes, pos, str.size());
- pos += str.size();
+ return result;
+ }
+
+ // Internal function used by copyFrom(Iterable<ByteString>).
+ // Create a balanced concatenation of the next "length" elements from the
+ // iterable.
+ private static ByteString balancedConcat(Iterator<ByteString> iterator,
+ int length) {
+ assert length >= 1;
+ ByteString result;
+ if (length == 1) {
+ result = iterator.next();
+ } else {
+ int halfLength = length >>> 1;
+ ByteString left = balancedConcat(iterator, halfLength);
+ ByteString right = balancedConcat(iterator, length - halfLength);
+ result = left.concat(right);
}
- return new ByteString(bytes);
+ return result;
}
// =================================================================
@@ -174,194 +460,493 @@ public final class ByteString {
*
* @param target buffer to copy into
* @param offset in the target buffer
+ * @throws IndexOutOfBoundsException if the offset is negative or too large
*/
- public void copyTo(final byte[] target, final int offset) {
- System.arraycopy(bytes, 0, target, offset, bytes.length);
+ public void copyTo(byte[] target, int offset) {
+ copyTo(target, 0, offset, size());
}
/**
* Copies bytes into a buffer.
*
- * @param target buffer to copy into
+ * @param target buffer to copy into
* @param sourceOffset offset within these bytes
* @param targetOffset offset within the target buffer
- * @param size number of bytes to copy
+ * @param numberToCopy number of bytes to copy
+ * @throws IndexOutOfBoundsException if an offset or size is negative or too
+ * large
*/
- public void copyTo(final byte[] target, final int sourceOffset,
- final int targetOffset,
- final int size) {
- System.arraycopy(bytes, sourceOffset, target, targetOffset, size);
+ public void copyTo(byte[] target, int sourceOffset, int targetOffset,
+ int numberToCopy) {
+ if (sourceOffset < 0) {
+ throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
+ }
+ if (targetOffset < 0) {
+ throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
+ }
+ if (numberToCopy < 0) {
+ throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
+ }
+ if (sourceOffset + numberToCopy > size()) {
+ throw new IndexOutOfBoundsException(
+ "Source end offset < 0: " + (sourceOffset + numberToCopy));
+ }
+ if (targetOffset + numberToCopy > target.length) {
+ throw new IndexOutOfBoundsException(
+ "Target end offset < 0: " + (targetOffset + numberToCopy));
+ }
+ if (numberToCopy > 0) {
+ copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
+ }
}
/**
+ * Internal (package private) implementation of
+ * @link{#copyTo(byte[],int,int,int}.
+ * It assumes that all error checking has already been performed and that
+ * @code{numberToCopy > 0}.
+ */
+ protected abstract void copyToInternal(byte[] target, int sourceOffset,
+ int targetOffset, int numberToCopy);
+
+ /**
+ * Copies bytes into a ByteBuffer.
+ *
+ * @param target ByteBuffer to copy into.
+ * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
+ * @throws java.nio.BufferOverflowException if the {@code target}'s
+ * remaining() space is not large enough to hold the data.
+ */
+ public abstract void copyTo(ByteBuffer target);
+
+ /**
* Copies bytes to a {@code byte[]}.
+ *
+ * @return copied bytes
*/
public byte[] toByteArray() {
- final int size = bytes.length;
- final byte[] copy = new byte[size];
- System.arraycopy(bytes, 0, copy, 0, size);
- return copy;
+ int size = size();
+ if (size == 0) {
+ return Internal.EMPTY_BYTE_ARRAY;
+ }
+ byte[] result = new byte[size];
+ copyToInternal(result, 0, 0, size);
+ return result;
}
/**
- * Constructs a new read-only {@code java.nio.ByteBuffer} with the
- * same backing byte array.
+ * Writes the complete contents of this byte string to
+ * the specified output stream argument.
+ *
+ * @param out the output stream to which to write the data.
+ * @throws IOException if an I/O error occurs.
*/
- public ByteBuffer asReadOnlyByteBuffer() {
- final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
- return byteBuffer.asReadOnlyBuffer();
+ public abstract void writeTo(OutputStream out) throws IOException;
+
+ /**
+ * Writes a specified part of this byte string to an output stream.
+ *
+ * @param out the output stream to which to write the data.
+ * @param sourceOffset offset within these bytes
+ * @param numberToWrite number of bytes to write
+ * @throws IOException if an I/O error occurs.
+ * @throws IndexOutOfBoundsException if an offset or size is negative or too
+ * large
+ */
+ void writeTo(OutputStream out, int sourceOffset, int numberToWrite)
+ throws IOException {
+ if (sourceOffset < 0) {
+ throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
+ }
+ if (numberToWrite < 0) {
+ throw new IndexOutOfBoundsException("Length < 0: " + numberToWrite);
+ }
+ if (sourceOffset + numberToWrite > size()) {
+ throw new IndexOutOfBoundsException(
+ "Source end offset exceeded: " + (sourceOffset + numberToWrite));
+ }
+ if (numberToWrite > 0) {
+ writeToInternal(out, sourceOffset, numberToWrite);
+ }
+
}
/**
+ * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes
+ * all error checking has already been done.
+ */
+ abstract void writeToInternal(OutputStream out, int sourceOffset,
+ int numberToWrite) throws IOException;
+
+ /**
+ * Constructs a read-only {@code java.nio.ByteBuffer} whose content
+ * is equal to the contents of this byte string.
+ * The result uses the same backing array as the byte string, if possible.
+ *
+ * @return wrapped bytes
+ */
+ public abstract ByteBuffer asReadOnlyByteBuffer();
+
+ /**
+ * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
+ * such that the concatenation of their contents is equal to the contents
+ * of this byte string. The result uses the same backing arrays as the
+ * byte string.
+ * <p>
+ * By returning a list, implementations of this method may be able to avoid
+ * copying even when there are multiple backing arrays.
+ *
+ * @return a list of wrapped bytes
+ */
+ public abstract List<ByteBuffer> asReadOnlyByteBufferList();
+
+ /**
* Constructs a new {@code String} by decoding the bytes using the
* specified charset.
+ *
+ * @param charsetName encode using this charset
+ * @return new string
+ * @throws UnsupportedEncodingException if charset isn't recognized
*/
- public String toString(final String charsetName)
- throws UnsupportedEncodingException {
- return new String(bytes, charsetName);
- }
+ public abstract String toString(String charsetName)
+ throws UnsupportedEncodingException;
+
+ // =================================================================
+ // UTF-8 decoding
/**
* Constructs a new {@code String} by decoding the bytes as UTF-8.
+ *
+ * @return new string using UTF-8 encoding
*/
public String toStringUtf8() {
try {
- return new String(bytes, "UTF-8");
+ return toString("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
}
+ /**
+ * Tells whether this {@code ByteString} represents a well-formed UTF-8
+ * byte sequence, such that the original bytes can be converted to a
+ * String object and then round tripped back to bytes without loss.
+ *
+ * <p>More precisely, returns {@code true} whenever: <pre> {@code
+ * Arrays.equals(byteString.toByteArray(),
+ * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
+ * }</pre>
+ *
+ * <p>This method returns {@code false} for "overlong" byte sequences,
+ * as well as for 3-byte sequences that would map to a surrogate
+ * character, in accordance with the restricted definition of UTF-8
+ * introduced in Unicode 3.1. Note that the UTF-8 decoder included in
+ * Oracle's JDK has been modified to also reject "overlong" byte
+ * sequences, but (as of 2011) still accepts 3-byte surrogate
+ * character byte sequences.
+ *
+ * <p>See the Unicode Standard,</br>
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+ *
+ * @return whether the bytes in this {@code ByteString} are a
+ * well-formed UTF-8 byte sequence
+ */
+ public abstract boolean isValidUtf8();
+
+ /**
+ * Tells whether the given byte sequence is a well-formed, malformed, or
+ * incomplete UTF-8 byte sequence. This method accepts and returns a partial
+ * state result, allowing the bytes for a complete UTF-8 byte sequence to be
+ * composed from multiple {@code ByteString} segments.
+ *
+ * @param state either {@code 0} (if this is the initial decoding operation)
+ * or the value returned from a call to a partial decoding method for the
+ * previous bytes
+ * @param offset offset of the first byte to check
+ * @param length number of bytes to check
+ *
+ * @return {@code -1} if the partial byte sequence is definitely malformed,
+ * {@code 0} if it is well-formed (no additional input needed), or, if the
+ * byte sequence is "incomplete", i.e. apparently terminated in the middle of
+ * a character, an opaque integer "state" value containing enough information
+ * to decode the character when passed to a subsequent invocation of a
+ * partial decoding method.
+ */
+ protected abstract int partialIsValidUtf8(int state, int offset, int length);
+
// =================================================================
// equals() and hashCode()
@Override
- public boolean equals(final Object o) {
- if (o == this) {
- return true;
- }
-
- if (!(o instanceof ByteString)) {
- return false;
- }
-
- final ByteString other = (ByteString) o;
- final int size = bytes.length;
- if (size != other.bytes.length) {
- return false;
- }
-
- final byte[] thisBytes = bytes;
- final byte[] otherBytes = other.bytes;
- for (int i = 0; i < size; i++) {
- if (thisBytes[i] != otherBytes[i]) {
- return false;
- }
- }
-
- return true;
- }
-
- private volatile int hash = 0;
+ public abstract boolean equals(Object o);
+ /**
+ * Return a non-zero hashCode depending only on the sequence of bytes
+ * in this ByteString.
+ *
+ * @return hashCode value for this object
+ */
@Override
- public int hashCode() {
- int h = hash;
-
- if (h == 0) {
- final byte[] thisBytes = bytes;
- final int size = bytes.length;
-
- h = size;
- for (int i = 0; i < size; i++) {
- h = h * 31 + thisBytes[i];
- }
- if (h == 0) {
- h = 1;
- }
-
- hash = h;
- }
-
- return h;
- }
+ public abstract int hashCode();
// =================================================================
// Input stream
/**
* Creates an {@code InputStream} which can be used to read the bytes.
+ * <p>
+ * The {@link InputStream} returned by this method is guaranteed to be
+ * completely non-blocking. The method {@link InputStream#available()}
+ * returns the number of bytes remaining in the stream. The methods
+ * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
+ * and {@link InputStream#skip(long)} will read/skip as many bytes as are
+ * available.
+ * <p>
+ * The methods in the returned {@link InputStream} might <b>not</b> be
+ * thread safe.
+ *
+ * @return an input stream that returns the bytes of this byte string.
*/
- public InputStream newInput() {
- return new ByteArrayInputStream(bytes);
- }
+ public abstract InputStream newInput();
/**
* Creates a {@link CodedInputStream} which can be used to read the bytes.
- * Using this is more efficient than creating a {@link CodedInputStream}
- * wrapping the result of {@link #newInput()}.
+ * Using this is often more efficient than creating a {@link CodedInputStream}
+ * that wraps the result of {@link #newInput()}.
+ *
+ * @return stream based on wrapped data
*/
- public CodedInputStream newCodedInput() {
- // We trust CodedInputStream not to modify the bytes, or to give anyone
- // else access to them.
- return CodedInputStream.newInstance(bytes);
- }
+ public abstract CodedInputStream newCodedInput();
// =================================================================
// Output stream
/**
- * Creates a new {@link Output} with the given initial capacity.
+ * Creates a new {@link Output} with the given initial capacity. Call {@link
+ * Output#toByteString()} to create the {@code ByteString} instance.
+ * <p>
+ * A {@link ByteString.Output} offers the same functionality as a
+ * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
+ * rather than a {@code byte} array.
+ *
+ * @param initialCapacity estimate of number of bytes to be written
+ * @return {@code OutputStream} for building a {@code ByteString}
*/
- public static Output newOutput(final int initialCapacity) {
- return new Output(new ByteArrayOutputStream(initialCapacity));
+ public static Output newOutput(int initialCapacity) {
+ return new Output(initialCapacity);
}
/**
- * Creates a new {@link Output}.
+ * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
+ * the {@code ByteString} instance.
+ * <p>
+ * A {@link ByteString.Output} offers the same functionality as a
+ * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
+ * rather than a {@code byte array}.
+ *
+ * @return {@code OutputStream} for building a {@code ByteString}
*/
public static Output newOutput() {
- return newOutput(32);
+ return new Output(CONCATENATE_BY_COPY_SIZE);
}
/**
* Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
* create the {@code ByteString} instance.
*/
- public static final class Output extends FilterOutputStream {
- private final ByteArrayOutputStream bout;
+ public static final class Output extends OutputStream {
+ // Implementation note.
+ // The public methods of this class must be synchronized. ByteStrings
+ // are guaranteed to be immutable. Without some sort of locking, it could
+ // be possible for one thread to call toByteSring(), while another thread
+ // is still modifying the underlying byte array.
+
+ private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+ // argument passed by user, indicating initial capacity.
+ private final int initialCapacity;
+ // ByteStrings to be concatenated to create the result
+ private final ArrayList<ByteString> flushedBuffers;
+ // Total number of bytes in the ByteStrings of flushedBuffers
+ private int flushedBuffersTotalBytes;
+ // Current buffer to which we are writing
+ private byte[] buffer;
+ // Location in buffer[] to which we write the next byte.
+ private int bufferPos;
/**
- * Constructs a new output with the given initial capacity.
+ * Creates a new ByteString output stream with the specified
+ * initial capacity.
+ *
+ * @param initialCapacity the initial capacity of the output stream.
*/
- private Output(final ByteArrayOutputStream bout) {
- super(bout);
- this.bout = bout;
+ Output(int initialCapacity) {
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("Buffer size < 0");
+ }
+ this.initialCapacity = initialCapacity;
+ this.flushedBuffers = new ArrayList<ByteString>();
+ this.buffer = new byte[initialCapacity];
+ }
+
+ @Override
+ public synchronized void write(int b) {
+ if (bufferPos == buffer.length) {
+ flushFullBuffer(1);
+ }
+ buffer[bufferPos++] = (byte)b;
+ }
+
+ @Override
+ public synchronized void write(byte[] b, int offset, int length) {
+ if (length <= buffer.length - bufferPos) {
+ // The bytes can fit into the current buffer.
+ System.arraycopy(b, offset, buffer, bufferPos, length);
+ bufferPos += length;
+ } else {
+ // Use up the current buffer
+ int copySize = buffer.length - bufferPos;
+ System.arraycopy(b, offset, buffer, bufferPos, copySize);
+ offset += copySize;
+ length -= copySize;
+ // Flush the buffer, and get a new buffer at least big enough to cover
+ // what we still need to output
+ flushFullBuffer(length);
+ System.arraycopy(b, offset, buffer, 0 /* count */, length);
+ bufferPos = length;
+ }
}
/**
- * Creates a {@code ByteString} instance from this {@code Output}.
+ * Creates a byte string. Its size is the current size of this output
+ * stream and its output has been copied to it.
+ *
+ * @return the current contents of this output stream, as a byte string.
*/
- public ByteString toByteString() {
- final byte[] byteArray = bout.toByteArray();
- return new ByteString(byteArray);
+ public synchronized ByteString toByteString() {
+ flushLastBuffer();
+ return ByteString.copyFrom(flushedBuffers);
+ }
+
+ /**
+ * Implement java.util.Arrays.copyOf() for jdk 1.5.
+ */
+ private byte[] copyArray(byte[] buffer, int length) {
+ byte[] result = new byte[length];
+ System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length));
+ return result;
+ }
+
+ /**
+ * Writes the complete contents of this byte array output stream to
+ * the specified output stream argument.
+ *
+ * @param out the output stream to which to write the data.
+ * @throws IOException if an I/O error occurs.
+ */
+ public void writeTo(OutputStream out) throws IOException {
+ ByteString[] cachedFlushBuffers;
+ byte[] cachedBuffer;
+ int cachedBufferPos;
+ synchronized (this) {
+ // Copy the information we need into local variables so as to hold
+ // the lock for as short a time as possible.
+ cachedFlushBuffers =
+ flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
+ cachedBuffer = buffer;
+ cachedBufferPos = bufferPos;
+ }
+ for (ByteString byteString : cachedFlushBuffers) {
+ byteString.writeTo(out);
+ }
+
+ out.write(copyArray(cachedBuffer, cachedBufferPos));
+ }
+
+ /**
+ * Returns the current size of the output stream.
+ *
+ * @return the current size of the output stream
+ */
+ public synchronized int size() {
+ return flushedBuffersTotalBytes + bufferPos;
+ }
+
+ /**
+ * Resets this stream, so that all currently accumulated output in the
+ * output stream is discarded. The output stream can be used again,
+ * reusing the already allocated buffer space.
+ */
+ public synchronized void reset() {
+ flushedBuffers.clear();
+ flushedBuffersTotalBytes = 0;
+ bufferPos = 0;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("<ByteString.Output@%s size=%d>",
+ Integer.toHexString(System.identityHashCode(this)), size());
+ }
+
+ /**
+ * Internal function used by writers. The current buffer is full, and the
+ * writer needs a new buffer whose size is at least the specified minimum
+ * size.
+ */
+ private void flushFullBuffer(int minSize) {
+ flushedBuffers.add(new LiteralByteString(buffer));
+ flushedBuffersTotalBytes += buffer.length;
+ // We want to increase our total capacity by 50%, but as a minimum,
+ // the new buffer should also at least be >= minSize and
+ // >= initial Capacity.
+ int newSize = Math.max(initialCapacity,
+ Math.max(minSize, flushedBuffersTotalBytes >>> 1));
+ buffer = new byte[newSize];
+ bufferPos = 0;
+ }
+
+ /**
+ * Internal function used by {@link #toByteString()}. The current buffer may
+ * or may not be full, but it needs to be flushed.
+ */
+ private void flushLastBuffer() {
+ if (bufferPos < buffer.length) {
+ if (bufferPos > 0) {
+ byte[] bufferCopy = copyArray(buffer, bufferPos);
+ flushedBuffers.add(new LiteralByteString(bufferCopy));
+ }
+ // We reuse this buffer for further writes.
+ } else {
+ // Buffer is completely full. Huzzah.
+ flushedBuffers.add(new LiteralByteString(buffer));
+ // 99% of the time, we're not going to use this OutputStream again.
+ // We set buffer to an empty byte stream so that we're handling this
+ // case without wasting space. In the rare case that more writes
+ // *do* occur, this empty buffer will be flushed and an appropriately
+ // sized new buffer will be created.
+ buffer = EMPTY_BYTE_ARRAY;
+ }
+ flushedBuffersTotalBytes += bufferPos;
+ bufferPos = 0;
}
}
/**
- * Constructs a new ByteString builder, which allows you to efficiently
- * construct a {@code ByteString} by writing to a {@link CodedOutputStream}.
- * Using this is much more efficient than calling {@code newOutput()} and
- * wrapping that in a {@code CodedOutputStream}.
+ * Constructs a new {@code ByteString} builder, which allows you to
+ * efficiently construct a {@code ByteString} by writing to a {@link
+ * CodedOutputStream}. Using this is much more efficient than calling {@code
+ * newOutput()} and wrapping that in a {@code CodedOutputStream}.
*
* <p>This is package-private because it's a somewhat confusing interface.
* Users can call {@link Message#toByteString()} instead of calling this
* directly.
*
- * @param size The target byte size of the {@code ByteString}. You must
- * write exactly this many bytes before building the result.
+ * @param size The target byte size of the {@code ByteString}. You must write
+ * exactly this many bytes before building the result.
+ * @return the builder
*/
- static CodedBuilder newCodedBuilder(final int size) {
+ static CodedBuilder newCodedBuilder(int size) {
return new CodedBuilder(size);
}
@@ -370,7 +955,7 @@ public final class ByteString {
private final CodedOutputStream output;
private final byte[] buffer;
- private CodedBuilder(final int size) {
+ private CodedBuilder(int size) {
buffer = new byte[size];
output = CodedOutputStream.newInstance(buffer);
}
@@ -381,11 +966,57 @@ public final class ByteString {
// We can be confident that the CodedOutputStream will not modify the
// underlying bytes anymore because it already wrote all of them. So,
// no need to make a copy.
- return new ByteString(buffer);
+ return new LiteralByteString(buffer);
}
public CodedOutputStream getCodedOutput() {
return output;
}
}
+
+ // =================================================================
+ // Methods {@link RopeByteString} needs on instances, which aren't part of the
+ // public API.
+
+ /**
+ * Return the depth of the tree representing this {@code ByteString}, if any,
+ * whose root is this node. If this is a leaf node, return 0.
+ *
+ * @return tree depth or zero
+ */
+ protected abstract int getTreeDepth();
+
+ /**
+ * Return {@code true} if this ByteString is literal (a leaf node) or a
+ * flat-enough tree in the sense of {@link RopeByteString}.
+ *
+ * @return true if the tree is flat enough
+ */
+ protected abstract boolean isBalanced();
+
+ /**
+ * Return the cached hash code if available.
+ *
+ * @return value of cached hash code or 0 if not computed yet
+ */
+ protected abstract int peekCachedHashCode();
+
+ /**
+ * Compute the hash across the value bytes starting with the given hash, and
+ * return the result. This is used to compute the hash across strings
+ * represented as a set of pieces by allowing the hash computation to be
+ * continued from piece to piece.
+ *
+ * @param h starting hash value
+ * @param offset offset into this value to start looking at data values
+ * @param length number of data values to include in the hash computation
+ * @return ending hash value
+ */
+ protected abstract int partialHash(int h, int offset, int length);
+
+ @Override
+ public String toString() {
+ return String.format("<ByteString@%s size=%d>",
+ Integer.toHexString(System.identityHashCode(this)), size());
+ }
}