summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/parser/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'JavaScriptCore/parser/Lexer.cpp')
-rw-r--r--JavaScriptCore/parser/Lexer.cpp621
1 files changed, 313 insertions, 308 deletions
diff --git a/JavaScriptCore/parser/Lexer.cpp b/JavaScriptCore/parser/Lexer.cpp
index 45fe007..d7a122e 100644
--- a/JavaScriptCore/parser/Lexer.cpp
+++ b/JavaScriptCore/parser/Lexer.cpp
@@ -46,14 +46,16 @@ using namespace Unicode;
namespace JSC {
-enum CharacterTypes {
+enum CharacterType {
// Types for the main switch
- CharacterInvalid,
- CharacterAlpha,
+ // The first three types are fixed, and also used for identifying
+ // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
+ CharacterIdentifierStart,
CharacterZero,
CharacterNumber,
+ CharacterInvalid,
CharacterLineTerminator,
CharacterExclamationMark,
CharacterOpenParen,
@@ -87,8 +89,8 @@ enum CharacterTypes {
CharacterWhiteSpace,
};
-// 128 ascii codes
-static unsigned short AsciiCharacters[128] = {
+// 128 ASCII codes
+static const unsigned short typesOfASCIICharacters[128] = {
/* 0 - Null */ CharacterInvalid,
/* 1 - Start of Heading */ CharacterInvalid,
/* 2 - Start of Text */ CharacterInvalid,
@@ -125,7 +127,7 @@ static unsigned short AsciiCharacters[128] = {
/* 33 - ! */ CharacterExclamationMark,
/* 34 - " */ CharacterQuote,
/* 35 - # */ CharacterInvalid,
-/* 36 - $ */ CharacterAlpha,
+/* 36 - $ */ CharacterIdentifierStart,
/* 37 - % */ CharacterModulo,
/* 38 - & */ CharacterAnd,
/* 39 - ' */ CharacterQuote,
@@ -154,64 +156,64 @@ static unsigned short AsciiCharacters[128] = {
/* 62 - > */ CharacterGreater,
/* 63 - ? */ CharacterQuestion,
/* 64 - @ */ CharacterInvalid,
-/* 65 - A */ CharacterAlpha,
-/* 66 - B */ CharacterAlpha,
-/* 67 - C */ CharacterAlpha,
-/* 68 - D */ CharacterAlpha,
-/* 69 - E */ CharacterAlpha,
-/* 70 - F */ CharacterAlpha,
-/* 71 - G */ CharacterAlpha,
-/* 72 - H */ CharacterAlpha,
-/* 73 - I */ CharacterAlpha,
-/* 74 - J */ CharacterAlpha,
-/* 75 - K */ CharacterAlpha,
-/* 76 - L */ CharacterAlpha,
-/* 77 - M */ CharacterAlpha,
-/* 78 - N */ CharacterAlpha,
-/* 79 - O */ CharacterAlpha,
-/* 80 - P */ CharacterAlpha,
-/* 81 - Q */ CharacterAlpha,
-/* 82 - R */ CharacterAlpha,
-/* 83 - S */ CharacterAlpha,
-/* 84 - T */ CharacterAlpha,
-/* 85 - U */ CharacterAlpha,
-/* 86 - V */ CharacterAlpha,
-/* 87 - W */ CharacterAlpha,
-/* 88 - X */ CharacterAlpha,
-/* 89 - Y */ CharacterAlpha,
-/* 90 - Z */ CharacterAlpha,
+/* 65 - A */ CharacterIdentifierStart,
+/* 66 - B */ CharacterIdentifierStart,
+/* 67 - C */ CharacterIdentifierStart,
+/* 68 - D */ CharacterIdentifierStart,
+/* 69 - E */ CharacterIdentifierStart,
+/* 70 - F */ CharacterIdentifierStart,
+/* 71 - G */ CharacterIdentifierStart,
+/* 72 - H */ CharacterIdentifierStart,
+/* 73 - I */ CharacterIdentifierStart,
+/* 74 - J */ CharacterIdentifierStart,
+/* 75 - K */ CharacterIdentifierStart,
+/* 76 - L */ CharacterIdentifierStart,
+/* 77 - M */ CharacterIdentifierStart,
+/* 78 - N */ CharacterIdentifierStart,
+/* 79 - O */ CharacterIdentifierStart,
+/* 80 - P */ CharacterIdentifierStart,
+/* 81 - Q */ CharacterIdentifierStart,
+/* 82 - R */ CharacterIdentifierStart,
+/* 83 - S */ CharacterIdentifierStart,
+/* 84 - T */ CharacterIdentifierStart,
+/* 85 - U */ CharacterIdentifierStart,
+/* 86 - V */ CharacterIdentifierStart,
+/* 87 - W */ CharacterIdentifierStart,
+/* 88 - X */ CharacterIdentifierStart,
+/* 89 - Y */ CharacterIdentifierStart,
+/* 90 - Z */ CharacterIdentifierStart,
/* 91 - [ */ CharacterOpenBracket,
/* 92 - \ */ CharacterBackSlash,
/* 93 - ] */ CharacterCloseBracket,
/* 94 - ^ */ CharacterXor,
-/* 95 - _ */ CharacterAlpha,
+/* 95 - _ */ CharacterIdentifierStart,
/* 96 - ` */ CharacterInvalid,
-/* 97 - a */ CharacterAlpha,
-/* 98 - b */ CharacterAlpha,
-/* 99 - c */ CharacterAlpha,
-/* 100 - d */ CharacterAlpha,
-/* 101 - e */ CharacterAlpha,
-/* 102 - f */ CharacterAlpha,
-/* 103 - g */ CharacterAlpha,
-/* 104 - h */ CharacterAlpha,
-/* 105 - i */ CharacterAlpha,
-/* 106 - j */ CharacterAlpha,
-/* 107 - k */ CharacterAlpha,
-/* 108 - l */ CharacterAlpha,
-/* 109 - m */ CharacterAlpha,
-/* 110 - n */ CharacterAlpha,
-/* 111 - o */ CharacterAlpha,
-/* 112 - p */ CharacterAlpha,
-/* 113 - q */ CharacterAlpha,
-/* 114 - r */ CharacterAlpha,
-/* 115 - s */ CharacterAlpha,
-/* 116 - t */ CharacterAlpha,
-/* 117 - u */ CharacterAlpha,
-/* 118 - v */ CharacterAlpha,
-/* 119 - w */ CharacterAlpha,
-/* 120 - x */ CharacterAlpha,
-/* 121 - y */ CharacterAlpha,
-/* 122 - z */ CharacterAlpha,
+/* 97 - a */ CharacterIdentifierStart,
+/* 98 - b */ CharacterIdentifierStart,
+/* 99 - c */ CharacterIdentifierStart,
+/* 100 - d */ CharacterIdentifierStart,
+/* 101 - e */ CharacterIdentifierStart,
+/* 102 - f */ CharacterIdentifierStart,
+/* 103 - g */ CharacterIdentifierStart,
+/* 104 - h */ CharacterIdentifierStart,
+/* 105 - i */ CharacterIdentifierStart,
+/* 106 - j */ CharacterIdentifierStart,
+/* 107 - k */ CharacterIdentifierStart,
+/* 108 - l */ CharacterIdentifierStart,
+/* 109 - m */ CharacterIdentifierStart,
+/* 110 - n */ CharacterIdentifierStart,
+/* 111 - o */ CharacterIdentifierStart,
+/* 112 - p */ CharacterIdentifierStart,
+/* 113 - q */ CharacterIdentifierStart,
+/* 114 - r */ CharacterIdentifierStart,
+/* 115 - s */ CharacterIdentifierStart,
+/* 116 - t */ CharacterIdentifierStart,
+/* 117 - u */ CharacterIdentifierStart,
+/* 118 - v */ CharacterIdentifierStart,
+/* 119 - w */ CharacterIdentifierStart,
+/* 120 - x */ CharacterIdentifierStart,
+/* 121 - y */ CharacterIdentifierStart,
+/* 122 - z */ CharacterIdentifierStart,
/* 123 - { */ CharacterOpenBrace,
/* 124 - | */ CharacterOr,
/* 125 - } */ CharacterCloseBrace,
@@ -335,7 +337,7 @@ static NEVER_INLINE bool isNonASCIIIdentStart(int c)
static inline bool isIdentStart(int c)
{
- return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
+ return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
}
static NEVER_INLINE bool isNonASCIIIdentPart(int c)
@@ -346,32 +348,35 @@ static NEVER_INLINE bool isNonASCIIIdentPart(int c)
static inline bool isIdentPart(int c)
{
- return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
+ // Character types are divided into two groups depending on whether they can be part of an
+ // identifier or not. Those whose type value is less or equal than CharacterNumber can be
+ // part of an identifier. (See the CharacterType definition for more details.)
+ return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
}
static inline int singleEscape(int c)
{
switch (c) {
- case 'b':
- return 0x08;
- case 't':
- return 0x09;
- case 'n':
- return 0x0A;
- case 'v':
- return 0x0B;
- case 'f':
- return 0x0C;
- case 'r':
- return 0x0D;
- case '\\':
- return '\\';
- case '\'':
- return '\'';
- case '"':
- return '"';
- default:
- return 0;
+ case 'b':
+ return 0x08;
+ case 't':
+ return 0x09;
+ case 'n':
+ return 0x0A;
+ case 'v':
+ return 0x0B;
+ case 'f':
+ return 0x0C;
+ case 'r':
+ return 0x0D;
+ case '\\':
+ return '\\';
+ case '\'':
+ return '\'';
+ case '"':
+ return '"';
+ default:
+ return 0;
}
}
@@ -456,7 +461,11 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
stringStart = currentCharacter();
continue;
- } else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+ }
+ // Fast check for characters that require special handling.
+ // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
+ // as possible, and lets through all common ASCII characters.
+ if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
// New-line or end of input is not allowed
if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1))
return false;
@@ -472,7 +481,7 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
return true;
}
-JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp)
+JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType lexType)
{
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
@@ -493,295 +502,287 @@ start:
m_delimited = false;
- if (isASCII(m_current)) {
- ASSERT(m_current >= 0 && m_current < 128);
+ CharacterType type;
+ if (LIKELY(isASCII(m_current)))
+ type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
+ else if (isNonASCIIIdentStart(m_current))
+ type = CharacterIdentifierStart;
+ else if (isLineTerminator(m_current))
+ type = CharacterLineTerminator;
+ else
+ type = CharacterInvalid;
- switch (AsciiCharacters[m_current]) {
- case CharacterGreater:
+ switch (type) {
+ case CharacterGreater:
+ shift();
+ if (m_current == '>') {
shift();
if (m_current == '>') {
shift();
- if (m_current == '>') {
- shift();
- if (m_current == '=') {
- shift();
- token = URSHIFTEQUAL;
- break;
- }
- token = URSHIFT;
- break;
- }
if (m_current == '=') {
shift();
- token = RSHIFTEQUAL;
+ token = URSHIFTEQUAL;
break;
}
- token = RSHIFT;
- break;
- }
- if (m_current == '=') {
- shift();
- token = GE;
- break;
- }
- token = GT;
- break;
- case CharacterEqual:
- shift();
- if (m_current == '=') {
- shift();
- if (m_current == '=') {
- shift();
- token = STREQ;
- break;
- }
- token = EQEQ;
- break;
- }
- token = EQUAL;
- break;
- case CharacterLess:
- shift();
- if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
- // <!-- marks the beginning of a line comment (for www usage)
- goto inSingleLineComment;
- }
- if (m_current == '<') {
- shift();
- if (m_current == '=') {
- shift();
- token = LSHIFTEQUAL;
- break;
- }
- token = LSHIFT;
- break;
- }
- if (m_current == '=') {
- shift();
- token = LE;
+ token = URSHIFT;
break;
}
- token = LT;
- break;
- case CharacterExclamationMark:
- shift();
if (m_current == '=') {
shift();
- if (m_current == '=') {
- shift();
- token = STRNEQ;
- break;
- }
- token = NE;
+ token = RSHIFTEQUAL;
break;
}
- token = EXCLAMATION;
+ token = RSHIFT;
break;
- case CharacterAdd:
+ }
+ if (m_current == '=') {
shift();
- if (m_current == '+') {
- shift();
- token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
- break;
- }
- if (m_current == '=') {
- shift();
- token = PLUSEQUAL;
- break;
- }
- token = PLUS;
+ token = GE;
break;
- case CharacterSub:
+ }
+ token = GT;
+ break;
+ case CharacterEqual:
+ shift();
+ if (m_current == '=') {
shift();
- if (m_current == '-') {
- shift();
- if (m_atLineStart && m_current == '>') {
- shift();
- goto inSingleLineComment;
- }
- token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
- break;
- }
if (m_current == '=') {
shift();
- token = MINUSEQUAL;
+ token = STREQ;
break;
}
- token = MINUS;
+ token = EQEQ;
break;
- case CharacterMultiply:
+ }
+ token = EQUAL;
+ break;
+ case CharacterLess:
+ shift();
+ if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
+ // <!-- marks the beginning of a line comment (for www usage)
+ goto inSingleLineComment;
+ }
+ if (m_current == '<') {
shift();
if (m_current == '=') {
shift();
- token = MULTEQUAL;
+ token = LSHIFTEQUAL;
break;
}
- token = TIMES;
+ token = LSHIFT;
break;
- case CharacterSlash:
+ }
+ if (m_current == '=') {
shift();
- if (m_current == '/') {
- shift();
- goto inSingleLineComment;
- }
- if (m_current == '*') {
- shift();
- goto inMultiLineComment;
- }
- if (m_current == '=') {
- shift();
- token = DIVEQUAL;
- break;
- }
- token = DIVIDE;
+ token = LE;
break;
- case CharacterAnd:
+ }
+ token = LT;
+ break;
+ case CharacterExclamationMark:
+ shift();
+ if (m_current == '=') {
shift();
- if (m_current == '&') {
- shift();
- token = AND;
- break;
- }
if (m_current == '=') {
shift();
- token = ANDEQUAL;
+ token = STRNEQ;
break;
}
- token = BITAND;
+ token = NE;
break;
- case CharacterXor:
+ }
+ token = EXCLAMATION;
+ break;
+ case CharacterAdd:
+ shift();
+ if (m_current == '+') {
shift();
- if (m_current == '=') {
- shift();
- token = XOREQUAL;
- break;
- }
- token = BITXOR;
+ token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
break;
- case CharacterModulo:
+ }
+ if (m_current == '=') {
shift();
- if (m_current == '=') {
- shift();
- token = MODEQUAL;
- break;
- }
- token = MOD;
+ token = PLUSEQUAL;
break;
- case CharacterOr:
+ }
+ token = PLUS;
+ break;
+ case CharacterSub:
+ shift();
+ if (m_current == '-') {
shift();
- if (m_current == '=') {
+ if (m_atLineStart && m_current == '>') {
shift();
- token = OREQUAL;
- break;
- }
- if (m_current == '|') {
- shift();
- token = OR;
- break;
- }
- token = BITOR;
- break;
- case CharacterDot:
- shift();
- if (isASCIIDigit(m_current)) {
- record8('.');
- goto inNumberAfterDecimalPoint;
+ goto inSingleLineComment;
}
- token = DOT;
- break;
- case CharacterOpenParen:
- token = OPENPAREN;
- shift();
+ token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
break;
- case CharacterCloseParen:
- token = CLOSEPAREN;
+ }
+ if (m_current == '=') {
shift();
+ token = MINUSEQUAL;
break;
- case CharacterOpenBracket:
- token = OPENBRACKET;
+ }
+ token = MINUS;
+ break;
+ case CharacterMultiply:
+ shift();
+ if (m_current == '=') {
shift();
+ token = MULTEQUAL;
break;
- case CharacterCloseBracket:
- token = CLOSEBRACKET;
+ }
+ token = TIMES;
+ break;
+ case CharacterSlash:
+ shift();
+ if (m_current == '/') {
shift();
- break;
- case CharacterComma:
- token = COMMA;
+ goto inSingleLineComment;
+ }
+ if (m_current == '*') {
shift();
- break;
- case CharacterColon:
- token = COLON;
+ goto inMultiLineComment;
+ }
+ if (m_current == '=') {
shift();
+ token = DIVEQUAL;
break;
- case CharacterQuestion:
- token = QUESTION;
+ }
+ token = DIVIDE;
+ break;
+ case CharacterAnd:
+ shift();
+ if (m_current == '&') {
shift();
+ token = AND;
break;
- case CharacterTilde:
- token = TILDE;
+ }
+ if (m_current == '=') {
shift();
+ token = ANDEQUAL;
break;
- case CharacterSemicolon:
- m_delimited = true;
+ }
+ token = BITAND;
+ break;
+ case CharacterXor:
+ shift();
+ if (m_current == '=') {
shift();
- token = SEMICOLON;
+ token = XOREQUAL;
break;
- case CharacterOpenBrace:
- lvalp->intValue = currentOffset();
+ }
+ token = BITXOR;
+ break;
+ case CharacterModulo:
+ shift();
+ if (m_current == '=') {
shift();
- token = OPENBRACE;
+ token = MODEQUAL;
break;
- case CharacterCloseBrace:
- lvalp->intValue = currentOffset();
- m_delimited = true;
+ }
+ token = MOD;
+ break;
+ case CharacterOr:
+ shift();
+ if (m_current == '=') {
shift();
- token = CLOSEBRACE;
+ token = OREQUAL;
break;
- case CharacterBackSlash:
- goto startIdentifierWithBackslash;
- case CharacterZero:
- goto startNumberWithZeroDigit;
- case CharacterNumber:
- goto startNumber;
- case CharacterQuote:
- if (UNLIKELY(!parseString(lvalp)))
- goto returnError;
+ }
+ if (m_current == '|') {
shift();
- m_delimited = false;
- token = STRING;
+ token = OR;
break;
- case CharacterAlpha:
- ASSERT(isIdentStart(m_current));
- goto startIdentifierOrKeyword;
- case CharacterLineTerminator:
- ASSERT(isLineTerminator(m_current));
- shiftLineTerminator();
- m_atLineStart = true;
- m_terminator = true;
- if (lastTokenWasRestrKeyword()) {
- token = SEMICOLON;
- goto doneSemicolon;
- }
- goto start;
- case CharacterInvalid:
- goto returnError;
- default:
- ASSERT_NOT_REACHED();
- goto returnError;
}
- } else {
- // Rare characters
-
- if (isNonASCIIIdentStart(m_current))
- goto startIdentifierOrKeyword;
- if (isLineTerminator(m_current)) {
- shiftLineTerminator();
- m_atLineStart = true;
- m_terminator = true;
- if (lastTokenWasRestrKeyword())
- goto doneSemicolon;
- goto start;
+ token = BITOR;
+ break;
+ case CharacterDot:
+ shift();
+ if (isASCIIDigit(m_current)) {
+ record8('.');
+ goto inNumberAfterDecimalPoint;
}
+ token = DOT;
+ break;
+ case CharacterOpenParen:
+ token = OPENPAREN;
+ shift();
+ break;
+ case CharacterCloseParen:
+ token = CLOSEPAREN;
+ shift();
+ break;
+ case CharacterOpenBracket:
+ token = OPENBRACKET;
+ shift();
+ break;
+ case CharacterCloseBracket:
+ token = CLOSEBRACKET;
+ shift();
+ break;
+ case CharacterComma:
+ token = COMMA;
+ shift();
+ break;
+ case CharacterColon:
+ token = COLON;
+ shift();
+ break;
+ case CharacterQuestion:
+ token = QUESTION;
+ shift();
+ break;
+ case CharacterTilde:
+ token = TILDE;
+ shift();
+ break;
+ case CharacterSemicolon:
+ m_delimited = true;
+ shift();
+ token = SEMICOLON;
+ break;
+ case CharacterOpenBrace:
+ lvalp->intValue = currentOffset();
+ shift();
+ token = OPENBRACE;
+ break;
+ case CharacterCloseBrace:
+ lvalp->intValue = currentOffset();
+ m_delimited = true;
+ shift();
+ token = CLOSEBRACE;
+ break;
+ case CharacterBackSlash:
+ goto startIdentifierWithBackslash;
+ case CharacterZero:
+ goto startNumberWithZeroDigit;
+ case CharacterNumber:
+ goto startNumber;
+ case CharacterQuote:
+ if (UNLIKELY(!parseString(lvalp)))
+ goto returnError;
+ shift();
+ m_delimited = false;
+ token = STRING;
+ break;
+ case CharacterIdentifierStart:
+ ASSERT(isIdentStart(m_current));
+ goto startIdentifierOrKeyword;
+ case CharacterLineTerminator:
+ ASSERT(isLineTerminator(m_current));
+ shiftLineTerminator();
+ m_atLineStart = true;
+ m_terminator = true;
+ if (lastTokenWasRestrKeyword()) {
+ token = SEMICOLON;
+ goto doneSemicolon;
+ }
+ goto start;
+ case CharacterInvalid:
+ goto returnError;
+ default:
+ ASSERT_NOT_REACHED();
goto returnError;
}
@@ -1021,9 +1022,11 @@ doneIdentifierOrKeyword: {
m_atLineStart = false;
m_delimited = false;
m_buffer16.resize(0);
- const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
- token = entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
-
+ if (lexType == IdentifyReservedWords) {
+ const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
+ token = entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
+ } else
+ token = IDENT;
// Fall through into returnToken.
}
@@ -1157,6 +1160,8 @@ void Lexer::clear()
SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
{
+ ASSERT(m_source->provider()->data()[openBrace] == '{');
+ ASSERT(m_source->provider()->data()[closeBrace] == '}');
return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
}