From 4b116a2f5d3c6e2a0a7fe39d5eb956563138d542 Mon Sep 17 00:00:00 2001 From: Neil Fuller Date: Fri, 20 Jun 2014 14:30:23 +0100 Subject: Fix the OOME in ScannerParseLargeFileBenchmarkTest Scanner had no mechanism for recovering buffer space it didn't need. Now, if the buffer is more than 50% full of ignorable characters the remaining characters are shuffled to the beginning to recover space. For most expected usecases this means that the buffer will stay 1k and contain up to 512 characters of useful data. A growable, circular character buffer could have been introduced to avoid the copy but is not worth the effort. Previously the buffer would double in size until the data or memory was exhausted, and each read would also double in size to fill the empty half of the buffer. This was fine providing the input could fit in memory. On top of that the 1k, 2k, 4k, etc. buffer was repeatedly turned into a String and passed to the (native) matcher, and then the matcher was told to ignore more than half of it. As a consequence of keeping the buffer a fixed size (and only filling 50% of it at a time), this change may cause a performance regression: for most usecases where delimiters are closer together than 512 bytes, reads after the first will now usually be 512 bytes and not the 1k, 2k, 4k, etc. it was previously. Having fixed the test so it doesn't OOM, the test now takes 6 minutes to pass on host and so is unsuitable for inclusion in CTS tests and so is being suppressed. This change also includes so tidy up changes to the test and the Scanner class. The implementation could no doubt be improved but the API makes it inherently slow. It would be surprising if anybody uses the Scanner class on Android with so many better alternatives. Bug: 14865710 Change-Id: I40a7fc0c2bfaf6db4e42108efe417303e76bde24 --- .../util/ScannerParseLargeFileBenchmarkTest.java | 45 ++++++++++------------ 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'harmony-tests/src/test/java') diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/ScannerParseLargeFileBenchmarkTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/ScannerParseLargeFileBenchmarkTest.java index 4b0d1ea..c0f9e58 100644 --- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/ScannerParseLargeFileBenchmarkTest.java +++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/ScannerParseLargeFileBenchmarkTest.java @@ -24,11 +24,11 @@ import junit.framework.TestCase; public class ScannerParseLargeFileBenchmarkTest extends TestCase { /** - * This test will check when parse a large file like more than 200M bytes if - * the Scanner will exhaust all heap memory + * Check whether the Scanner will exhaust all heap memory when parsing a + * large file. */ public void testParseLargeFile() throws Exception { - MyReader reader = new MyReader(); + FakeLargeFile reader = new FakeLargeFile(); String delimiter = "\r?\n"; Scanner scanner = new Scanner(reader).useDelimiter(delimiter); @@ -39,14 +39,9 @@ public class ScannerParseLargeFileBenchmarkTest extends TestCase { reader.close(); } - private static class MyReader extends Reader { - static final char[] CONTENT = "large file!\n".toCharArray(); - - static long fileLength = (8 << 21) * 12; - - static boolean first = true; - - static int position = 0; + private static class FakeLargeFile extends Reader { + private static final char[] CONTENT = "large file!\n".toCharArray(); + private static final int FILE_LENGTH = 192 * 1024 * 1024; // 192 MB private int count = 0; @@ -55,22 +50,24 @@ public class ScannerParseLargeFileBenchmarkTest extends TestCase { } @Override - public int read(char[] buf, int offset, int length) { - if (count >= fileLength) { + public int read(char[] buffer, int offset, int length) { + if (count >= FILE_LENGTH) { return -1; } - if (first == true) { - position = 0; - first = false; - } - for (int i = offset; i < length; i++) { - buf[i] = CONTENT[(i + position) % CONTENT.length]; - count++; - } - - position = (length + position) % CONTENT.length; - return length - offset; + final int charsToRead = Math.min(FILE_LENGTH - count, length); + int bufferIndex = offset; + int contentIndex = count % CONTENT.length; + int charsRead = 0; + while (charsRead < charsToRead) { + buffer[bufferIndex++] = CONTENT[contentIndex++]; + if (contentIndex == CONTENT.length) { + contentIndex = 0; + } + charsRead++; + } + count += charsRead; + return charsToRead; } } } -- cgit v1.1