summaryrefslogtreecommitdiffstats
path: root/WebCore/html/parser/HTMLParserIdioms.h
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/parser/HTMLParserIdioms.h')
-rw-r--r--WebCore/html/parser/HTMLParserIdioms.h13
1 files changed, 11 insertions, 2 deletions
diff --git a/WebCore/html/parser/HTMLParserIdioms.h b/WebCore/html/parser/HTMLParserIdioms.h
index f4704f7..4839138 100644
--- a/WebCore/html/parser/HTMLParserIdioms.h
+++ b/WebCore/html/parser/HTMLParserIdioms.h
@@ -52,8 +52,17 @@ bool parseHTMLInteger(const String&, int&);
inline bool isHTMLSpace(UChar character)
{
- // FIXME: Consider branch permutations as we did in isASCIISpace.
- return character == '\t' || character == '\x0A' || character == '\x0C' || character == '\x0D' || character == ' ';
+ // Histogram from Apple's page load test combined with some ad hoc browsing some other test suites.
+ //
+ // 82%: 216330 non-space characters, all > U+0020
+ // 11%: 30017 plain space characters, U+0020
+ // 5%: 12099 newline characters, U+000A
+ // 2%: 5346 tab characters, U+0009
+ //
+ // No other characters seen. No U+000C or U+000D, and no other control characters.
+ // Accordingly, we check for non-spaces first, then space, then newline, then tab, then the other characters.
+
+ return character <= ' ' && (character == ' ' || character == '\n' || character == '\t' || character == '\r' || character == '\f');
}
inline bool isNotHTMLSpace(UChar character)