diff options
Diffstat (limited to 'WebCore/html/parser/HTMLParserIdioms.h')
-rw-r--r-- | WebCore/html/parser/HTMLParserIdioms.h | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/WebCore/html/parser/HTMLParserIdioms.h b/WebCore/html/parser/HTMLParserIdioms.h index f4704f7..4839138 100644 --- a/WebCore/html/parser/HTMLParserIdioms.h +++ b/WebCore/html/parser/HTMLParserIdioms.h @@ -52,8 +52,17 @@ bool parseHTMLInteger(const String&, int&); inline bool isHTMLSpace(UChar character) { - // FIXME: Consider branch permutations as we did in isASCIISpace. - return character == '\t' || character == '\x0A' || character == '\x0C' || character == '\x0D' || character == ' '; + // Histogram from Apple's page load test combined with some ad hoc browsing some other test suites. + // + // 82%: 216330 non-space characters, all > U+0020 + // 11%: 30017 plain space characters, U+0020 + // 5%: 12099 newline characters, U+000A + // 2%: 5346 tab characters, U+0009 + // + // No other characters seen. No U+000C or U+000D, and no other control characters. + // Accordingly, we check for non-spaces first, then space, then newline, then tab, then the other characters. + + return character <= ' ' && (character == ' ' || character == '\n' || character == '\t' || character == '\r' || character == '\f'); } inline bool isNotHTMLSpace(UChar character) |