summaryrefslogtreecommitdiffstats
path: root/luni
diff options
context:
space:
mode:
authorNeil Fuller <nfuller@google.com>2014-06-20 14:30:23 +0100
committerNeil Fuller <nfuller@google.com>2014-07-30 13:03:09 +0100
commit4b116a2f5d3c6e2a0a7fe39d5eb956563138d542 (patch)
tree146d52b8fde17ce4728777280cc756c348160c5e /luni
parent2eb56b69de339978a29d94531759b465742f027f (diff)
downloadlibcore-4b116a2f5d3c6e2a0a7fe39d5eb956563138d542.zip
libcore-4b116a2f5d3c6e2a0a7fe39d5eb956563138d542.tar.gz
libcore-4b116a2f5d3c6e2a0a7fe39d5eb956563138d542.tar.bz2
Fix the OOME in ScannerParseLargeFileBenchmarkTest
Scanner had no mechanism for recovering buffer space it didn't need. Now, if the buffer is more than 50% full of ignorable characters the remaining characters are shuffled to the beginning to recover space. For most expected usecases this means that the buffer will stay 1k and contain up to 512 characters of useful data. A growable, circular character buffer could have been introduced to avoid the copy but is not worth the effort. Previously the buffer would double in size until the data or memory was exhausted, and each read would also double in size to fill the empty half of the buffer. This was fine providing the input could fit in memory. On top of that the 1k, 2k, 4k, etc. buffer was repeatedly turned into a String and passed to the (native) matcher, and then the matcher was told to ignore more than half of it. As a consequence of keeping the buffer a fixed size (and only filling 50% of it at a time), this change may cause a performance regression: for most usecases where delimiters are closer together than 512 bytes, reads after the first will now usually be 512 bytes and not the 1k, 2k, 4k, etc. it was previously. Having fixed the test so it doesn't OOM, the test now takes 6 minutes to pass on host and so is unsuitable for inclusion in CTS tests and so is being suppressed. This change also includes so tidy up changes to the test and the Scanner class. The implementation could no doubt be improved but the API makes it inherently slow. It would be surprising if anybody uses the Scanner class on Android with so many better alternatives. Bug: 14865710 Change-Id: I40a7fc0c2bfaf6db4e42108efe417303e76bde24
Diffstat (limited to 'luni')
-rw-r--r--luni/src/main/java/java/util/Scanner.java105
1 files changed, 67 insertions, 38 deletions
diff --git a/luni/src/main/java/java/util/Scanner.java b/luni/src/main/java/java/util/Scanner.java
index 7d504b7..7d0e795 100644
--- a/luni/src/main/java/java/util/Scanner.java
+++ b/luni/src/main/java/java/util/Scanner.java
@@ -159,12 +159,15 @@ public final class Scanner implements Closeable, Iterator<String> {
if (charsetName == null) {
throw new IllegalArgumentException("charsetName == null");
}
+
+ InputStreamReader streamReader;
try {
- setInput(new InputStreamReader(fis, charsetName));
+ streamReader = new InputStreamReader(fis, charsetName);
} catch (UnsupportedEncodingException e) {
IoUtils.closeQuietly(fis);
throw new IllegalArgumentException(e.getMessage());
}
+ initialize(streamReader);
}
/**
@@ -174,7 +177,7 @@ public final class Scanner implements Closeable, Iterator<String> {
* the string to be scanned.
*/
public Scanner(String src) {
- setInput(new StringReader(src));
+ initialize(new StringReader(src));
}
/**
@@ -203,11 +206,14 @@ public final class Scanner implements Closeable, Iterator<String> {
if (src == null) {
throw new NullPointerException("src == null");
}
+
+ InputStreamReader streamReader;
try {
- setInput(new InputStreamReader(src, charsetName));
+ streamReader = new InputStreamReader(src, charsetName);
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException(e.getMessage());
}
+ initialize(streamReader);
}
/**
@@ -220,7 +226,7 @@ public final class Scanner implements Closeable, Iterator<String> {
if (src == null) {
throw new NullPointerException("src == null");
}
- setInput(src);
+ initialize(src);
}
/**
@@ -252,13 +258,14 @@ public final class Scanner implements Closeable, Iterator<String> {
if (charsetName == null) {
throw new IllegalArgumentException("charsetName == null");
}
- setInput(Channels.newReader(src, charsetName));
+ initialize(Channels.newReader(src, charsetName));
}
- private void setInput(Readable input) {
+ private void initialize(Readable input) {
this.input = input;
- buffer.limit(0);
- matcher = delimiter.matcher(buffer);
+ matcher = delimiter.matcher("");
+ matcher.useTransparentBounds(true);
+ matcher.useAnchoringBounds(false);
}
/**
@@ -535,7 +542,7 @@ public final class Scanner implements Closeable, Iterator<String> {
checkOpen();
checkNotNull(pattern);
matchSuccessful = false;
- saveCurrentStatus();
+ prepareForScan();
// if the next token exists, set the match region, otherwise return
// false
if (!setTokenRegion()) {
@@ -790,7 +797,7 @@ public final class Scanner implements Closeable, Iterator<String> {
* @throws IllegalStateException if this {@code Scanner} is closed.
*/
public boolean hasNextLine() {
- saveCurrentStatus();
+ prepareForScan();
String result = findWithinHorizon(LINE_PATTERN, 0);
recoverPreviousStatus();
return result != null;
@@ -954,7 +961,7 @@ public final class Scanner implements Closeable, Iterator<String> {
checkOpen();
checkNotNull(pattern);
matchSuccessful = false;
- saveCurrentStatus();
+ prepareForScan();
if (!setTokenRegion()) {
recoverPreviousStatus();
// if setting match region fails
@@ -1204,7 +1211,7 @@ public final class Scanner implements Closeable, Iterator<String> {
Pattern floatPattern = getFloatPattern();
String floatString = next(floatPattern);
floatString = removeLocaleInfoFromFloat(floatString);
- double doubleValue = 0;
+ double doubleValue;
try {
doubleValue = Double.parseDouble(floatString);
} catch (NumberFormatException e) {
@@ -1248,7 +1255,7 @@ public final class Scanner implements Closeable, Iterator<String> {
Pattern floatPattern = getFloatPattern();
String floatString = next(floatPattern);
floatString = removeLocaleInfoFromFloat(floatString);
- float floatValue = 0;
+ float floatValue;
try {
floatValue = Float.parseFloat(floatString);
} catch (NumberFormatException e) {
@@ -1310,7 +1317,7 @@ public final class Scanner implements Closeable, Iterator<String> {
Pattern integerPattern = getIntegerPattern(radix);
String intString = next(integerPattern);
intString = removeLocaleInfo(intString, int.class);
- int intValue = 0;
+ int intValue;
try {
intValue = Integer.parseInt(intString, radix);
} catch (NumberFormatException e) {
@@ -1340,7 +1347,7 @@ public final class Scanner implements Closeable, Iterator<String> {
matcher.usePattern(LINE_PATTERN);
matcher.region(findStartIndex, bufferLength);
- String result = null;
+ String result;
while (true) {
if (matcher.find()) {
if (inputExhausted || matcher.end() != bufferLength
@@ -1422,7 +1429,7 @@ public final class Scanner implements Closeable, Iterator<String> {
Pattern integerPattern = getIntegerPattern(radix);
String intString = next(integerPattern);
intString = removeLocaleInfo(intString, int.class);
- long longValue = 0;
+ long longValue;
try {
longValue = Long.parseLong(intString, radix);
} catch (NumberFormatException e) {
@@ -1484,7 +1491,7 @@ public final class Scanner implements Closeable, Iterator<String> {
Pattern integerPattern = getIntegerPattern(radix);
String intString = next(integerPattern);
intString = removeLocaleInfo(intString, int.class);
- short shortValue = 0;
+ short shortValue;
try {
shortValue = Short.parseShort(intString, radix);
} catch (NumberFormatException e) {
@@ -1662,23 +1669,46 @@ public final class Scanner implements Closeable, Iterator<String> {
}
/*
- * Change the matcher's string after reading input
+ * Change the matcher's input after modifying the contents of the buffer.
+ * The current implementation of Matcher causes a copy of the buffer to be taken.
*/
private void resetMatcher() {
- if (matcher == null) {
- matcher = delimiter.matcher(buffer);
- } else {
- matcher.reset(buffer);
- }
- matcher.useTransparentBounds(true);
- matcher.useAnchoringBounds(false);
+ matcher.reset(buffer);
matcher.region(findStartIndex, bufferLength);
}
/*
- * Save the matcher's last find position
- */
- private void saveCurrentStatus() {
+ * Recover buffer space for characters that are already processed and save the matcher's state
+ * in case parsing fails. See recoverPrevousState. This method must be called before
+ * any buffer offsets are calculated.
+ */
+ private void prepareForScan() {
+ // Compacting the buffer recovers space taken by already processed characters. This does not
+ // prevent the buffer growing in all situations but keeps the buffer small when delimiters
+ // exist regularly.
+ if (findStartIndex >= buffer.capacity() / 2) {
+ // When over half the buffer is filled with characters no longer being considered by the
+ // scanner we take the cost of compacting the buffer.
+
+ // Move all characters from [findStartIndex, findStartIndex + remaining()) to
+ // [0, remaining()).
+ int oldPosition = buffer.position();
+ buffer.position(findStartIndex);
+ buffer.compact();
+ buffer.position(oldPosition);
+
+ // Update Scanner state to reflect the new buffer state.
+ bufferLength -= findStartIndex;
+ findStartIndex = 0;
+ preStartIndex = -1;
+
+ // The matcher must also be informed that the buffer has changed because it operates on
+ // a String copy.
+ resetMatcher();
+ }
+
+ // Save the matcher's last find position so it can be returned to if the next token cannot
+ // be parsed.
preStartIndex = findStartIndex;
}
@@ -1822,7 +1852,7 @@ public final class Scanner implements Closeable, Iterator<String> {
boolean negative = removeLocaleSign(tokenBuilder);
// Remove group separator
String groupSeparator = String.valueOf(dfs.getGroupingSeparator());
- int separatorIndex = -1;
+ int separatorIndex;
while ((separatorIndex = tokenBuilder.indexOf(groupSeparator)) != -1) {
tokenBuilder.delete(separatorIndex, separatorIndex + 1);
}
@@ -1909,9 +1939,9 @@ public final class Scanner implements Closeable, Iterator<String> {
*/
private boolean setTokenRegion() {
// The position where token begins
- int tokenStartIndex = 0;
+ int tokenStartIndex;
// The position where token ends
- int tokenEndIndex = 0;
+ int tokenEndIndex;
// Use delimiter pattern
matcher.usePattern(delimiter);
matcher.region(findStartIndex, bufferLength);
@@ -1945,8 +1975,7 @@ public final class Scanner implements Closeable, Iterator<String> {
if (matcher.find()) {
findComplete = true;
// If just delimiter remains
- if (matcher.start() == findStartIndex
- && matcher.end() == bufferLength) {
+ if (matcher.start() == findStartIndex && matcher.end() == bufferLength) {
// If more input resource exists
if (!inputExhausted) {
readMore();
@@ -1964,7 +1993,7 @@ public final class Scanner implements Closeable, Iterator<String> {
}
}
tokenStartIndex = matcher.end();
- findStartIndex = matcher.end();
+ findStartIndex = tokenStartIndex;
return tokenStartIndex;
}
@@ -1984,7 +2013,7 @@ public final class Scanner implements Closeable, Iterator<String> {
setSuccess = true;
}
// If the first delimiter of scanner is not at the find start position
- if (-1 != findIndex && preStartIndex != matcher.start()) {
+ if (findIndex != -1 && preStartIndex != matcher.start()) {
tokenStartIndex = preStartIndex;
tokenEndIndex = matcher.start();
findStartIndex = matcher.start();
@@ -1996,7 +2025,7 @@ public final class Scanner implements Closeable, Iterator<String> {
}
private int findDelimiterAfter() {
- int tokenEndIndex = 0;
+ int tokenEndIndex;
boolean findComplete = false;
while (!findComplete) {
if (matcher.find()) {
@@ -2014,7 +2043,7 @@ public final class Scanner implements Closeable, Iterator<String> {
}
}
tokenEndIndex = matcher.start();
- findStartIndex = matcher.start();
+ findStartIndex = tokenEndIndex;
return tokenEndIndex;
}
@@ -2032,7 +2061,7 @@ public final class Scanner implements Closeable, Iterator<String> {
}
// Read input resource
- int readCount = 0;
+ int readCount;
try {
buffer.limit(buffer.capacity());
buffer.position(oldBufferLength);