diff options
author | Ben Murdoch <benm@google.com> | 2010-07-22 15:37:06 +0100 |
---|---|---|
committer | Ben Murdoch <benm@google.com> | 2010-07-27 10:20:25 +0100 |
commit | 967717af5423377c967781471ee106e2bb4e11c8 (patch) | |
tree | 1e701dc0a12f7f07cce1df4a7681717de77a211b /JavaScriptCore/parser | |
parent | dcc30a9fca45f634b1d3a12b276d3a0ccce99fc3 (diff) | |
download | external_webkit-967717af5423377c967781471ee106e2bb4e11c8.zip external_webkit-967717af5423377c967781471ee106e2bb4e11c8.tar.gz external_webkit-967717af5423377c967781471ee106e2bb4e11c8.tar.bz2 |
Merge WebKit at r63859 : Initial merge by git.
Change-Id: Ie8096c63ec7c991c9a9cba8bdd9c3b74a3b8ed62
Diffstat (limited to 'JavaScriptCore/parser')
-rw-r--r-- | JavaScriptCore/parser/JSParser.cpp | 12 | ||||
-rw-r--r-- | JavaScriptCore/parser/JSParser.h | 19 | ||||
-rw-r--r-- | JavaScriptCore/parser/Lexer.cpp | 621 | ||||
-rw-r--r-- | JavaScriptCore/parser/Lexer.h | 5 | ||||
-rw-r--r-- | JavaScriptCore/parser/Nodes.cpp | 2 | ||||
-rw-r--r-- | JavaScriptCore/parser/Nodes.h | 9 | ||||
-rw-r--r-- | JavaScriptCore/parser/Parser.h | 29 | ||||
-rw-r--r-- | JavaScriptCore/parser/SourceProvider.h | 10 |
8 files changed, 371 insertions, 336 deletions
diff --git a/JavaScriptCore/parser/JSParser.cpp b/JavaScriptCore/parser/JSParser.cpp index 1fb1a9187..13013c7 100644 --- a/JavaScriptCore/parser/JSParser.cpp +++ b/JavaScriptCore/parser/JSParser.cpp @@ -85,12 +85,12 @@ private: }; const JSToken& token() { return m_token; } - void next() + void next(Lexer::LexType lexType = Lexer::IdentifyReservedWords) { m_lastLine = token().m_info.line; m_lastTokenEnd = token().m_info.endOffset; m_lexer->setLastLineNumber(m_lastLine); - m_token.m_type = m_lexer->lex(&m_token.m_data, &m_token.m_info); + m_token.m_type = m_lexer->lex(&m_token.m_data, &m_token.m_info, lexType); m_tokenCount++; } @@ -1091,11 +1091,12 @@ template <bool complete, class TreeBuilder> TreeProperty JSParser::parseProperty { bool wasIdent = false; switch (token().m_type) { + namedProperty: case IDENT: wasIdent = true; case STRING: { const Identifier* ident = token().m_data.ident; - next(); + next(Lexer::IgnoreReservedWords); if (match(COLON)) { next(); TreeExpression node = parseAssignmentExpression(context); @@ -1129,7 +1130,8 @@ template <bool complete, class TreeBuilder> TreeProperty JSParser::parseProperty return context.template createProperty<complete>(m_globalData, propertyName, node, PropertyNode::Constant); } default: - fail(); + failIfFalse(token().m_type & KeywordTokenFlag); + goto namedProperty; } } @@ -1410,7 +1412,7 @@ template <class TreeBuilder> TreeExpression JSParser::parseMemberExpression(Tree } case DOT: { int expressionEnd = lastTokenEnd(); - next(); + next(Lexer::IgnoreReservedWords); matchOrFail(IDENT); base = context.createDotAccess(base, *token().m_data.ident, expressionStart, expressionEnd, tokenEnd()); next(); diff --git a/JavaScriptCore/parser/JSParser.h b/JavaScriptCore/parser/JSParser.h index 60f284c..b5a21d9 100644 --- a/JavaScriptCore/parser/JSParser.h +++ b/JavaScriptCore/parser/JSParser.h @@ -34,16 +34,17 @@ class SourceCode; enum { UnaryOpTokenFlag = 64, - BinaryOpTokenPrecedenceShift = 7, + KeywordTokenFlag = 128, + BinaryOpTokenPrecedenceShift = 8, BinaryOpTokenAllowsInPrecedenceAdditionalShift = 4, - BinaryOpTokenPrecedenceMask = 15 << BinaryOpTokenPrecedenceShift + BinaryOpTokenPrecedenceMask = 15 << BinaryOpTokenPrecedenceShift, }; #define BINARY_OP_PRECEDENCE(prec) (((prec) << BinaryOpTokenPrecedenceShift) | ((prec) << (BinaryOpTokenPrecedenceShift + BinaryOpTokenAllowsInPrecedenceAdditionalShift))) #define IN_OP_PRECEDENCE(prec) ((prec) << (BinaryOpTokenPrecedenceShift + BinaryOpTokenAllowsInPrecedenceAdditionalShift)) enum JSTokenType { - NULLTOKEN, + NULLTOKEN = KeywordTokenFlag, TRUETOKEN, FALSETOKEN, BREAK, @@ -69,7 +70,7 @@ enum JSTokenType { FINALLY, DEBUGGER, ELSE, - OPENBRACE, + OPENBRACE = 0, CLOSEBRACE, OPENPAREN, CLOSEPAREN, @@ -106,9 +107,9 @@ enum JSTokenType { TILDE = 3 | UnaryOpTokenFlag, AUTOPLUSPLUS = 4 | UnaryOpTokenFlag, AUTOMINUSMINUS = 5 | UnaryOpTokenFlag, - TYPEOF = 6 | UnaryOpTokenFlag, - VOIDTOKEN = 7 | UnaryOpTokenFlag, - DELETETOKEN = 8 | UnaryOpTokenFlag, + TYPEOF = 6 | UnaryOpTokenFlag | KeywordTokenFlag, + VOIDTOKEN = 7 | UnaryOpTokenFlag | KeywordTokenFlag, + DELETETOKEN = 8 | UnaryOpTokenFlag | KeywordTokenFlag, OR = 0 | BINARY_OP_PRECEDENCE(1), AND = 1 | BINARY_OP_PRECEDENCE(2), BITOR = 2 | BINARY_OP_PRECEDENCE(3), @@ -122,8 +123,8 @@ enum JSTokenType { GT = 10 | BINARY_OP_PRECEDENCE(7), LE = 11 | BINARY_OP_PRECEDENCE(7), GE = 12 | BINARY_OP_PRECEDENCE(7), - INSTANCEOF = 13 | BINARY_OP_PRECEDENCE(7), - INTOKEN = 14 | IN_OP_PRECEDENCE(7), + INSTANCEOF = 13 | BINARY_OP_PRECEDENCE(7) | KeywordTokenFlag, + INTOKEN = 14 | IN_OP_PRECEDENCE(7) | KeywordTokenFlag, LSHIFT = 15 | BINARY_OP_PRECEDENCE(8), RSHIFT = 16 | BINARY_OP_PRECEDENCE(8), URSHIFT = 17 | BINARY_OP_PRECEDENCE(8), diff --git a/JavaScriptCore/parser/Lexer.cpp b/JavaScriptCore/parser/Lexer.cpp index 45fe007..d7a122e 100644 --- a/JavaScriptCore/parser/Lexer.cpp +++ b/JavaScriptCore/parser/Lexer.cpp @@ -46,14 +46,16 @@ using namespace Unicode; namespace JSC { -enum CharacterTypes { +enum CharacterType { // Types for the main switch - CharacterInvalid, - CharacterAlpha, + // The first three types are fixed, and also used for identifying + // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart). + CharacterIdentifierStart, CharacterZero, CharacterNumber, + CharacterInvalid, CharacterLineTerminator, CharacterExclamationMark, CharacterOpenParen, @@ -87,8 +89,8 @@ enum CharacterTypes { CharacterWhiteSpace, }; -// 128 ascii codes -static unsigned short AsciiCharacters[128] = { +// 128 ASCII codes +static const unsigned short typesOfASCIICharacters[128] = { /* 0 - Null */ CharacterInvalid, /* 1 - Start of Heading */ CharacterInvalid, /* 2 - Start of Text */ CharacterInvalid, @@ -125,7 +127,7 @@ static unsigned short AsciiCharacters[128] = { /* 33 - ! */ CharacterExclamationMark, /* 34 - " */ CharacterQuote, /* 35 - # */ CharacterInvalid, -/* 36 - $ */ CharacterAlpha, +/* 36 - $ */ CharacterIdentifierStart, /* 37 - % */ CharacterModulo, /* 38 - & */ CharacterAnd, /* 39 - ' */ CharacterQuote, @@ -154,64 +156,64 @@ static unsigned short AsciiCharacters[128] = { /* 62 - > */ CharacterGreater, /* 63 - ? */ CharacterQuestion, /* 64 - @ */ CharacterInvalid, -/* 65 - A */ CharacterAlpha, -/* 66 - B */ CharacterAlpha, -/* 67 - C */ CharacterAlpha, -/* 68 - D */ CharacterAlpha, -/* 69 - E */ CharacterAlpha, -/* 70 - F */ CharacterAlpha, -/* 71 - G */ CharacterAlpha, -/* 72 - H */ CharacterAlpha, -/* 73 - I */ CharacterAlpha, -/* 74 - J */ CharacterAlpha, -/* 75 - K */ CharacterAlpha, -/* 76 - L */ CharacterAlpha, -/* 77 - M */ CharacterAlpha, -/* 78 - N */ CharacterAlpha, -/* 79 - O */ CharacterAlpha, -/* 80 - P */ CharacterAlpha, -/* 81 - Q */ CharacterAlpha, -/* 82 - R */ CharacterAlpha, -/* 83 - S */ CharacterAlpha, -/* 84 - T */ CharacterAlpha, -/* 85 - U */ CharacterAlpha, -/* 86 - V */ CharacterAlpha, -/* 87 - W */ CharacterAlpha, -/* 88 - X */ CharacterAlpha, -/* 89 - Y */ CharacterAlpha, -/* 90 - Z */ CharacterAlpha, +/* 65 - A */ CharacterIdentifierStart, +/* 66 - B */ CharacterIdentifierStart, +/* 67 - C */ CharacterIdentifierStart, +/* 68 - D */ CharacterIdentifierStart, +/* 69 - E */ CharacterIdentifierStart, +/* 70 - F */ CharacterIdentifierStart, +/* 71 - G */ CharacterIdentifierStart, +/* 72 - H */ CharacterIdentifierStart, +/* 73 - I */ CharacterIdentifierStart, +/* 74 - J */ CharacterIdentifierStart, +/* 75 - K */ CharacterIdentifierStart, +/* 76 - L */ CharacterIdentifierStart, +/* 77 - M */ CharacterIdentifierStart, +/* 78 - N */ CharacterIdentifierStart, +/* 79 - O */ CharacterIdentifierStart, +/* 80 - P */ CharacterIdentifierStart, +/* 81 - Q */ CharacterIdentifierStart, +/* 82 - R */ CharacterIdentifierStart, +/* 83 - S */ CharacterIdentifierStart, +/* 84 - T */ CharacterIdentifierStart, +/* 85 - U */ CharacterIdentifierStart, +/* 86 - V */ CharacterIdentifierStart, +/* 87 - W */ CharacterIdentifierStart, +/* 88 - X */ CharacterIdentifierStart, +/* 89 - Y */ CharacterIdentifierStart, +/* 90 - Z */ CharacterIdentifierStart, /* 91 - [ */ CharacterOpenBracket, /* 92 - \ */ CharacterBackSlash, /* 93 - ] */ CharacterCloseBracket, /* 94 - ^ */ CharacterXor, -/* 95 - _ */ CharacterAlpha, +/* 95 - _ */ CharacterIdentifierStart, /* 96 - ` */ CharacterInvalid, -/* 97 - a */ CharacterAlpha, -/* 98 - b */ CharacterAlpha, -/* 99 - c */ CharacterAlpha, -/* 100 - d */ CharacterAlpha, -/* 101 - e */ CharacterAlpha, -/* 102 - f */ CharacterAlpha, -/* 103 - g */ CharacterAlpha, -/* 104 - h */ CharacterAlpha, -/* 105 - i */ CharacterAlpha, -/* 106 - j */ CharacterAlpha, -/* 107 - k */ CharacterAlpha, -/* 108 - l */ CharacterAlpha, -/* 109 - m */ CharacterAlpha, -/* 110 - n */ CharacterAlpha, -/* 111 - o */ CharacterAlpha, -/* 112 - p */ CharacterAlpha, -/* 113 - q */ CharacterAlpha, -/* 114 - r */ CharacterAlpha, -/* 115 - s */ CharacterAlpha, -/* 116 - t */ CharacterAlpha, -/* 117 - u */ CharacterAlpha, -/* 118 - v */ CharacterAlpha, -/* 119 - w */ CharacterAlpha, -/* 120 - x */ CharacterAlpha, -/* 121 - y */ CharacterAlpha, -/* 122 - z */ CharacterAlpha, +/* 97 - a */ CharacterIdentifierStart, +/* 98 - b */ CharacterIdentifierStart, +/* 99 - c */ CharacterIdentifierStart, +/* 100 - d */ CharacterIdentifierStart, +/* 101 - e */ CharacterIdentifierStart, +/* 102 - f */ CharacterIdentifierStart, +/* 103 - g */ CharacterIdentifierStart, +/* 104 - h */ CharacterIdentifierStart, +/* 105 - i */ CharacterIdentifierStart, +/* 106 - j */ CharacterIdentifierStart, +/* 107 - k */ CharacterIdentifierStart, +/* 108 - l */ CharacterIdentifierStart, +/* 109 - m */ CharacterIdentifierStart, +/* 110 - n */ CharacterIdentifierStart, +/* 111 - o */ CharacterIdentifierStart, +/* 112 - p */ CharacterIdentifierStart, +/* 113 - q */ CharacterIdentifierStart, +/* 114 - r */ CharacterIdentifierStart, +/* 115 - s */ CharacterIdentifierStart, +/* 116 - t */ CharacterIdentifierStart, +/* 117 - u */ CharacterIdentifierStart, +/* 118 - v */ CharacterIdentifierStart, +/* 119 - w */ CharacterIdentifierStart, +/* 120 - x */ CharacterIdentifierStart, +/* 121 - y */ CharacterIdentifierStart, +/* 122 - z */ CharacterIdentifierStart, /* 123 - { */ CharacterOpenBrace, /* 124 - | */ CharacterOr, /* 125 - } */ CharacterCloseBrace, @@ -335,7 +337,7 @@ static NEVER_INLINE bool isNonASCIIIdentStart(int c) static inline bool isIdentStart(int c) { - return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); + return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c); } static NEVER_INLINE bool isNonASCIIIdentPart(int c) @@ -346,32 +348,35 @@ static NEVER_INLINE bool isNonASCIIIdentPart(int c) static inline bool isIdentPart(int c) { - return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); + // Character types are divided into two groups depending on whether they can be part of an + // identifier or not. Those whose type value is less or equal than CharacterNumber can be + // part of an identifier. (See the CharacterType definition for more details.) + return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c); } static inline int singleEscape(int c) { switch (c) { - case 'b': - return 0x08; - case 't': - return 0x09; - case 'n': - return 0x0A; - case 'v': - return 0x0B; - case 'f': - return 0x0C; - case 'r': - return 0x0D; - case '\\': - return '\\'; - case '\'': - return '\''; - case '"': - return '"'; - default: - return 0; + case 'b': + return 0x08; + case 't': + return 0x09; + case 'n': + return 0x0A; + case 'v': + return 0x0B; + case 'f': + return 0x0C; + case 'r': + return 0x0D; + case '\\': + return '\\'; + case '\'': + return '\''; + case '"': + return '"'; + default: + return 0; } } @@ -456,7 +461,11 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp) stringStart = currentCharacter(); continue; - } else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { + } + // Fast check for characters that require special handling. + // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently + // as possible, and lets through all common ASCII characters. + if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { // New-line or end of input is not allowed if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1)) return false; @@ -472,7 +481,7 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp) return true; } -JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp) +JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType lexType) { ASSERT(!m_error); ASSERT(m_buffer8.isEmpty()); @@ -493,295 +502,287 @@ start: m_delimited = false; - if (isASCII(m_current)) { - ASSERT(m_current >= 0 && m_current < 128); + CharacterType type; + if (LIKELY(isASCII(m_current))) + type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]); + else if (isNonASCIIIdentStart(m_current)) + type = CharacterIdentifierStart; + else if (isLineTerminator(m_current)) + type = CharacterLineTerminator; + else + type = CharacterInvalid; - switch (AsciiCharacters[m_current]) { - case CharacterGreater: + switch (type) { + case CharacterGreater: + shift(); + if (m_current == '>') { shift(); if (m_current == '>') { shift(); - if (m_current == '>') { - shift(); - if (m_current == '=') { - shift(); - token = URSHIFTEQUAL; - break; - } - token = URSHIFT; - break; - } if (m_current == '=') { shift(); - token = RSHIFTEQUAL; + token = URSHIFTEQUAL; break; } - token = RSHIFT; - break; - } - if (m_current == '=') { - shift(); - token = GE; - break; - } - token = GT; - break; - case CharacterEqual: - shift(); - if (m_current == '=') { - shift(); - if (m_current == '=') { - shift(); - token = STREQ; - break; - } - token = EQEQ; - break; - } - token = EQUAL; - break; - case CharacterLess: - shift(); - if (m_current == '!' && peek(1) == '-' && peek(2) == '-') { - // <!-- marks the beginning of a line comment (for www usage) - goto inSingleLineComment; - } - if (m_current == '<') { - shift(); - if (m_current == '=') { - shift(); - token = LSHIFTEQUAL; - break; - } - token = LSHIFT; - break; - } - if (m_current == '=') { - shift(); - token = LE; + token = URSHIFT; break; } - token = LT; - break; - case CharacterExclamationMark: - shift(); if (m_current == '=') { shift(); - if (m_current == '=') { - shift(); - token = STRNEQ; - break; - } - token = NE; + token = RSHIFTEQUAL; break; } - token = EXCLAMATION; + token = RSHIFT; break; - case CharacterAdd: + } + if (m_current == '=') { shift(); - if (m_current == '+') { - shift(); - token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS; - break; - } - if (m_current == '=') { - shift(); - token = PLUSEQUAL; - break; - } - token = PLUS; + token = GE; break; - case CharacterSub: + } + token = GT; + break; + case CharacterEqual: + shift(); + if (m_current == '=') { shift(); - if (m_current == '-') { - shift(); - if (m_atLineStart && m_current == '>') { - shift(); - goto inSingleLineComment; - } - token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS; - break; - } if (m_current == '=') { shift(); - token = MINUSEQUAL; + token = STREQ; break; } - token = MINUS; + token = EQEQ; break; - case CharacterMultiply: + } + token = EQUAL; + break; + case CharacterLess: + shift(); + if (m_current == '!' && peek(1) == '-' && peek(2) == '-') { + // <!-- marks the beginning of a line comment (for www usage) + goto inSingleLineComment; + } + if (m_current == '<') { shift(); if (m_current == '=') { shift(); - token = MULTEQUAL; + token = LSHIFTEQUAL; break; } - token = TIMES; + token = LSHIFT; break; - case CharacterSlash: + } + if (m_current == '=') { shift(); - if (m_current == '/') { - shift(); - goto inSingleLineComment; - } - if (m_current == '*') { - shift(); - goto inMultiLineComment; - } - if (m_current == '=') { - shift(); - token = DIVEQUAL; - break; - } - token = DIVIDE; + token = LE; break; - case CharacterAnd: + } + token = LT; + break; + case CharacterExclamationMark: + shift(); + if (m_current == '=') { shift(); - if (m_current == '&') { - shift(); - token = AND; - break; - } if (m_current == '=') { shift(); - token = ANDEQUAL; + token = STRNEQ; break; } - token = BITAND; + token = NE; break; - case CharacterXor: + } + token = EXCLAMATION; + break; + case CharacterAdd: + shift(); + if (m_current == '+') { shift(); - if (m_current == '=') { - shift(); - token = XOREQUAL; - break; - } - token = BITXOR; + token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS; break; - case CharacterModulo: + } + if (m_current == '=') { shift(); - if (m_current == '=') { - shift(); - token = MODEQUAL; - break; - } - token = MOD; + token = PLUSEQUAL; break; - case CharacterOr: + } + token = PLUS; + break; + case CharacterSub: + shift(); + if (m_current == '-') { shift(); - if (m_current == '=') { + if (m_atLineStart && m_current == '>') { shift(); - token = OREQUAL; - break; - } - if (m_current == '|') { - shift(); - token = OR; - break; - } - token = BITOR; - break; - case CharacterDot: - shift(); - if (isASCIIDigit(m_current)) { - record8('.'); - goto inNumberAfterDecimalPoint; + goto inSingleLineComment; } - token = DOT; - break; - case CharacterOpenParen: - token = OPENPAREN; - shift(); + token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS; break; - case CharacterCloseParen: - token = CLOSEPAREN; + } + if (m_current == '=') { shift(); + token = MINUSEQUAL; break; - case CharacterOpenBracket: - token = OPENBRACKET; + } + token = MINUS; + break; + case CharacterMultiply: + shift(); + if (m_current == '=') { shift(); + token = MULTEQUAL; break; - case CharacterCloseBracket: - token = CLOSEBRACKET; + } + token = TIMES; + break; + case CharacterSlash: + shift(); + if (m_current == '/') { shift(); - break; - case CharacterComma: - token = COMMA; + goto inSingleLineComment; + } + if (m_current == '*') { shift(); - break; - case CharacterColon: - token = COLON; + goto inMultiLineComment; + } + if (m_current == '=') { shift(); + token = DIVEQUAL; break; - case CharacterQuestion: - token = QUESTION; + } + token = DIVIDE; + break; + case CharacterAnd: + shift(); + if (m_current == '&') { shift(); + token = AND; break; - case CharacterTilde: - token = TILDE; + } + if (m_current == '=') { shift(); + token = ANDEQUAL; break; - case CharacterSemicolon: - m_delimited = true; + } + token = BITAND; + break; + case CharacterXor: + shift(); + if (m_current == '=') { shift(); - token = SEMICOLON; + token = XOREQUAL; break; - case CharacterOpenBrace: - lvalp->intValue = currentOffset(); + } + token = BITXOR; + break; + case CharacterModulo: + shift(); + if (m_current == '=') { shift(); - token = OPENBRACE; + token = MODEQUAL; break; - case CharacterCloseBrace: - lvalp->intValue = currentOffset(); - m_delimited = true; + } + token = MOD; + break; + case CharacterOr: + shift(); + if (m_current == '=') { shift(); - token = CLOSEBRACE; + token = OREQUAL; break; - case CharacterBackSlash: - goto startIdentifierWithBackslash; - case CharacterZero: - goto startNumberWithZeroDigit; - case CharacterNumber: - goto startNumber; - case CharacterQuote: - if (UNLIKELY(!parseString(lvalp))) - goto returnError; + } + if (m_current == '|') { shift(); - m_delimited = false; - token = STRING; + token = OR; break; - case CharacterAlpha: - ASSERT(isIdentStart(m_current)); - goto startIdentifierOrKeyword; - case CharacterLineTerminator: - ASSERT(isLineTerminator(m_current)); - shiftLineTerminator(); - m_atLineStart = true; - m_terminator = true; - if (lastTokenWasRestrKeyword()) { - token = SEMICOLON; - goto doneSemicolon; - } - goto start; - case CharacterInvalid: - goto returnError; - default: - ASSERT_NOT_REACHED(); - goto returnError; } - } else { - // Rare characters - - if (isNonASCIIIdentStart(m_current)) - goto startIdentifierOrKeyword; - if (isLineTerminator(m_current)) { - shiftLineTerminator(); - m_atLineStart = true; - m_terminator = true; - if (lastTokenWasRestrKeyword()) - goto doneSemicolon; - goto start; + token = BITOR; + break; + case CharacterDot: + shift(); + if (isASCIIDigit(m_current)) { + record8('.'); + goto inNumberAfterDecimalPoint; } + token = DOT; + break; + case CharacterOpenParen: + token = OPENPAREN; + shift(); + break; + case CharacterCloseParen: + token = CLOSEPAREN; + shift(); + break; + case CharacterOpenBracket: + token = OPENBRACKET; + shift(); + break; + case CharacterCloseBracket: + token = CLOSEBRACKET; + shift(); + break; + case CharacterComma: + token = COMMA; + shift(); + break; + case CharacterColon: + token = COLON; + shift(); + break; + case CharacterQuestion: + token = QUESTION; + shift(); + break; + case CharacterTilde: + token = TILDE; + shift(); + break; + case CharacterSemicolon: + m_delimited = true; + shift(); + token = SEMICOLON; + break; + case CharacterOpenBrace: + lvalp->intValue = currentOffset(); + shift(); + token = OPENBRACE; + break; + case CharacterCloseBrace: + lvalp->intValue = currentOffset(); + m_delimited = true; + shift(); + token = CLOSEBRACE; + break; + case CharacterBackSlash: + goto startIdentifierWithBackslash; + case CharacterZero: + goto startNumberWithZeroDigit; + case CharacterNumber: + goto startNumber; + case CharacterQuote: + if (UNLIKELY(!parseString(lvalp))) + goto returnError; + shift(); + m_delimited = false; + token = STRING; + break; + case CharacterIdentifierStart: + ASSERT(isIdentStart(m_current)); + goto startIdentifierOrKeyword; + case CharacterLineTerminator: + ASSERT(isLineTerminator(m_current)); + shiftLineTerminator(); + m_atLineStart = true; + m_terminator = true; + if (lastTokenWasRestrKeyword()) { + token = SEMICOLON; + goto doneSemicolon; + } + goto start; + case CharacterInvalid: + goto returnError; + default: + ASSERT_NOT_REACHED(); goto returnError; } @@ -1021,9 +1022,11 @@ doneIdentifierOrKeyword: { m_atLineStart = false; m_delimited = false; m_buffer16.resize(0); - const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); - token = entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT; - + if (lexType == IdentifyReservedWords) { + const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); + token = entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT; + } else + token = IDENT; // Fall through into returnToken. } @@ -1157,6 +1160,8 @@ void Lexer::clear() SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) { + ASSERT(m_source->provider()->data()[openBrace] == '{'); + ASSERT(m_source->provider()->data()[closeBrace] == '}'); return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); } diff --git a/JavaScriptCore/parser/Lexer.h b/JavaScriptCore/parser/Lexer.h index 5ab7ad7..4f7af44 100644 --- a/JavaScriptCore/parser/Lexer.h +++ b/JavaScriptCore/parser/Lexer.h @@ -50,7 +50,8 @@ namespace JSC { void setIsReparsing() { m_isReparsing = true; } // Functions for the parser itself. - JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp); + enum LexType { IdentifyReservedWords, IgnoreReservedWords }; + JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType); int lineNumber() const { return m_lineNumber; } void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; } int lastLineNumber() const { return m_lastLineNumber; } @@ -126,7 +127,7 @@ namespace JSC { inline bool Lexer::isWhiteSpace(int ch) { - return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch); + return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF); } inline bool Lexer::isLineTerminator(int ch) diff --git a/JavaScriptCore/parser/Nodes.cpp b/JavaScriptCore/parser/Nodes.cpp index ffea524..c41d735 100644 --- a/JavaScriptCore/parser/Nodes.cpp +++ b/JavaScriptCore/parser/Nodes.cpp @@ -67,7 +67,7 @@ void SourceElements::append(StatementNode* statement) m_statements.append(statement); } -inline StatementNode* SourceElements::singleStatement() const +StatementNode* SourceElements::singleStatement() const { size_t size = m_statements.size(); return size == 1 ? m_statements[0] : 0; diff --git a/JavaScriptCore/parser/Nodes.h b/JavaScriptCore/parser/Nodes.h index 6206384..d25079b 100644 --- a/JavaScriptCore/parser/Nodes.h +++ b/JavaScriptCore/parser/Nodes.h @@ -152,6 +152,7 @@ namespace JSC { virtual bool isCommaNode() const { return false; } virtual bool isSimpleArray() const { return false; } virtual bool isAdd() const { return false; } + virtual bool isSubtract() const { return false; } virtual bool hasConditionContextCodegen() const { return false; } virtual void emitBytecodeInConditionContext(BytecodeGenerator&, Label*, Label*, bool) { ASSERT_NOT_REACHED(); } @@ -806,6 +807,9 @@ namespace JSC { RegisterID* emitStrcat(BytecodeGenerator& generator, RegisterID* destination, RegisterID* lhs = 0, ReadModifyResolveNode* emitExpressionInfoForMe = 0); + ExpressionNode* lhs() { return m_expr1; }; + ExpressionNode* rhs() { return m_expr2; }; + private: virtual RegisterID* emitBytecode(BytecodeGenerator&, RegisterID* = 0); @@ -854,6 +858,8 @@ namespace JSC { class SubNode : public BinaryOpNode { public: SubNode(JSGlobalData*, ExpressionNode* expr1, ExpressionNode* expr2, bool rightHasAssignments); + + virtual bool isSubtract() const { return true; } }; class LeftShiftNode : public BinaryOpNode { @@ -1143,6 +1149,7 @@ namespace JSC { public: BlockNode(JSGlobalData*, SourceElements* = 0); + StatementNode* singleStatement() const; StatementNode* lastStatement() const; private: @@ -1294,6 +1301,8 @@ namespace JSC { public: ReturnNode(JSGlobalData*, ExpressionNode* value); + ExpressionNode* value() { return m_value; } + private: virtual RegisterID* emitBytecode(BytecodeGenerator&, RegisterID* = 0); diff --git a/JavaScriptCore/parser/Parser.h b/JavaScriptCore/parser/Parser.h index 894f709..c167980 100644 --- a/JavaScriptCore/parser/Parser.h +++ b/JavaScriptCore/parser/Parser.h @@ -24,6 +24,7 @@ #define Parser_h #include "Debugger.h" +#include "ExceptionHelpers.h" #include "Executable.h" #include "JSGlobalObject.h" #include "Lexer.h" @@ -38,6 +39,7 @@ namespace JSC { class FunctionBodyNode; + class ProgramNode; class UString; @@ -46,7 +48,7 @@ namespace JSC { class Parser : public Noncopyable { public: template <class ParsedNode> - PassRefPtr<ParsedNode> parse(JSGlobalData* globalData, Debugger*, ExecState*, const SourceCode& source, int* errLine = 0, UString* errMsg = 0); + PassRefPtr<ParsedNode> parse(JSGlobalData* globalData, JSGlobalObject* lexicalGlobalObject, Debugger*, ExecState*, const SourceCode& source, JSObject** exception); void didFinishParsing(SourceElements*, ParserArenaData<DeclarationStacks::VarStack>*, ParserArenaData<DeclarationStacks::FunctionStack>*, CodeFeatures features, int lastLine, int numConstants); @@ -56,6 +58,10 @@ namespace JSC { private: void parse(JSGlobalData*, int* errLine, UString* errMsg); + // Used to determine type of error to report. + bool isFunctionBodyNode(ScopeNode*) { return false; } + bool isFunctionBodyNode(FunctionBodyNode*) { return true; } + ParserArena m_arena; const SourceCode* m_source; SourceElements* m_sourceElements; @@ -67,12 +73,16 @@ namespace JSC { }; template <class ParsedNode> - PassRefPtr<ParsedNode> Parser::parse(JSGlobalData* globalData, Debugger* debugger, ExecState* debuggerExecState, const SourceCode& source, int* errLine, UString* errMsg) + PassRefPtr<ParsedNode> Parser::parse(JSGlobalData* globalData, JSGlobalObject* lexicalGlobalObject, Debugger* debugger, ExecState* debuggerExecState, const SourceCode& source, JSObject** exception) { + ASSERT(exception && !*exception); + int errLine; + UString errMsg; + m_source = &source; if (ParsedNode::scopeIsFunction) globalData->lexer->setIsReparsing(); - parse(globalData, errLine, errMsg); + parse(globalData, &errLine, &errMsg); RefPtr<ParsedNode> result; if (m_sourceElements) { @@ -84,6 +94,17 @@ namespace JSC { m_features, m_numConstants); result->setLoc(m_source->firstLine(), m_lastLine); + } else if (lexicalGlobalObject) { + // We can never see a syntax error when reparsing a function, since we should have + // reported the error when parsing the containing program or eval code. So if we're + // parsing a function body node, we assume that what actually happened here is that + // we ran out of stack while parsing. If we see an error while parsing eval or program + // code we assume that it was a syntax error since running out of stack is much less + // likely, and we are currently unable to distinguish between the two cases. + if (isFunctionBodyNode(static_cast<ParsedNode*>(0))) + *exception = createStackOverflowError(lexicalGlobalObject); + else + *exception = addErrorInfo(globalData, createSyntaxError(lexicalGlobalObject, errMsg), errLine, source); } m_arena.reset(); @@ -94,7 +115,7 @@ namespace JSC { m_funcDeclarations = 0; if (debugger && !ParsedNode::scopeIsFunction) - debugger->sourceParsed(debuggerExecState, source, *errLine, *errMsg); + debugger->sourceParsed(debuggerExecState, source, errLine, errMsg); return result.release(); } diff --git a/JavaScriptCore/parser/SourceProvider.h b/JavaScriptCore/parser/SourceProvider.h index 6b9c028..5a57542 100644 --- a/JavaScriptCore/parser/SourceProvider.h +++ b/JavaScriptCore/parser/SourceProvider.h @@ -60,9 +60,9 @@ namespace JSC { class UStringSourceProvider : public SourceProvider { public: - static PassRefPtr<UStringSourceProvider> create(const UString& source, const UString& url, bool hasBOMs = true) + static PassRefPtr<UStringSourceProvider> create(const UString& source, const UString& url) { - return adoptRef(new UStringSourceProvider(source, url, hasBOMs)); + return adoptRef(new UStringSourceProvider(source, url)); } UString getRange(int start, int end) const @@ -73,14 +73,10 @@ namespace JSC { int length() const { return m_source.size(); } private: - UStringSourceProvider(const UString& source, const UString& url, bool hasBOMs) + UStringSourceProvider(const UString& source, const UString& url) : SourceProvider(url) , m_source(source) { - if (hasBOMs && m_source.size()) { - bool scratch = false; - m_source = UString(m_source.rep()->copyStringWithoutBOMs(false, scratch)); - } } UString m_source; |