diff options
author | Steve Block <steveblock@google.com> | 2011-05-13 06:44:40 -0700 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2011-05-13 06:44:40 -0700 |
commit | 08014c20784f3db5df3a89b73cce46037b77eb59 (patch) | |
tree | 47749210d31e19e6e2f64036fa8fae2ad693476f /Source/JavaScriptCore/runtime/LiteralParser.cpp | |
parent | 860220379e56aeb66424861ad602b07ee22b4055 (diff) | |
parent | 4c3661f7918f8b3f139f824efb7855bedccb4c94 (diff) | |
download | external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.zip external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.tar.gz external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.tar.bz2 |
Merge changes Ide388898,Ic49f367c,I1158a808,Iacb6ca5d,I2100dd3a,I5c1abe54,Ib0ef9902,I31dbc523,I570314b3
* changes:
Merge WebKit at r75315: Update WebKit version
Merge WebKit at r75315: Add FrameLoaderClient PageCache stubs
Merge WebKit at r75315: Stub out AXObjectCache::remove()
Merge WebKit at r75315: Fix ImageBuffer
Merge WebKit at r75315: Fix PluginData::initPlugins()
Merge WebKit at r75315: Fix conflicts
Merge WebKit at r75315: Fix Makefiles
Merge WebKit at r75315: Move Android-specific WebCore files to Source
Merge WebKit at r75315: Initial merge by git.
Diffstat (limited to 'Source/JavaScriptCore/runtime/LiteralParser.cpp')
-rw-r--r-- | Source/JavaScriptCore/runtime/LiteralParser.cpp | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/runtime/LiteralParser.cpp b/Source/JavaScriptCore/runtime/LiteralParser.cpp new file mode 100644 index 0000000..df87e7f --- /dev/null +++ b/Source/JavaScriptCore/runtime/LiteralParser.cpp @@ -0,0 +1,462 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "LiteralParser.h" + +#include "JSArray.h" +#include "JSString.h" +#include "Lexer.h" +#include "UStringBuilder.h" +#include <wtf/ASCIICType.h> +#include <wtf/dtoa.h> + +namespace JSC { + +static inline bool isJSONWhiteSpace(const UChar& c) +{ + // The JSON RFC 4627 defines a list of allowed characters to be considered + // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). + return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; +} + +LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) +{ + while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) + ++m_ptr; + + ASSERT(m_ptr <= m_end); + if (m_ptr >= m_end) { + token.type = TokEnd; + token.start = token.end = m_ptr; + return TokEnd; + } + token.type = TokError; + token.start = m_ptr; + switch (*m_ptr) { + case '[': + token.type = TokLBracket; + token.end = ++m_ptr; + return TokLBracket; + case ']': + token.type = TokRBracket; + token.end = ++m_ptr; + return TokRBracket; + case '(': + token.type = TokLParen; + token.end = ++m_ptr; + return TokLBracket; + case ')': + token.type = TokRParen; + token.end = ++m_ptr; + return TokRBracket; + case '{': + token.type = TokLBrace; + token.end = ++m_ptr; + return TokLBrace; + case '}': + token.type = TokRBrace; + token.end = ++m_ptr; + return TokRBrace; + case ',': + token.type = TokComma; + token.end = ++m_ptr; + return TokComma; + case ':': + token.type = TokColon; + token.end = ++m_ptr; + return TokColon; + case '"': + if (m_mode == StrictJSON) + return lexString<StrictJSON>(token); + return lexString<NonStrictJSON>(token); + case 't': + if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { + m_ptr += 4; + token.type = TokTrue; + token.end = m_ptr; + return TokTrue; + } + break; + case 'f': + if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { + m_ptr += 5; + token.type = TokFalse; + token.end = m_ptr; + return TokFalse; + } + break; + case 'n': + if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { + m_ptr += 4; + token.type = TokNull; + token.end = m_ptr; + return TokNull; + } + break; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return lexNumber(token); + } + return TokError; +} + +template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) +{ + return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; +} + +// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. +template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) +{ + ++m_ptr; + const UChar* runStart; + UStringBuilder builder; + do { + runStart = m_ptr; + while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) + ++m_ptr; + if (runStart < m_ptr) + builder.append(runStart, m_ptr - runStart); + if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { + ++m_ptr; + if (m_ptr >= m_end) + return TokError; + switch (*m_ptr) { + case '"': + builder.append('"'); + m_ptr++; + break; + case '\\': + builder.append('\\'); + m_ptr++; + break; + case '/': + builder.append('/'); + m_ptr++; + break; + case 'b': + builder.append('\b'); + m_ptr++; + break; + case 'f': + builder.append('\f'); + m_ptr++; + break; + case 'n': + builder.append('\n'); + m_ptr++; + break; + case 'r': + builder.append('\r'); + m_ptr++; + break; + case 't': + builder.append('\t'); + m_ptr++; + break; + + case 'u': + if ((m_end - m_ptr) < 5) // uNNNN == 5 characters + return TokError; + for (int i = 1; i < 5; i++) { + if (!isASCIIHexDigit(m_ptr[i])) + return TokError; + } + builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); + m_ptr += 5; + break; + + default: + return TokError; + } + } + } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); + + if (m_ptr >= m_end || *m_ptr != '"') + return TokError; + + token.stringToken = builder.toUString(); + token.type = TokString; + token.end = ++m_ptr; + return TokString; +} + +LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) +{ + // ES5 and json.org define numbers as + // number + // int + // int frac? exp? + // + // int + // -? 0 + // -? digit1-9 digits? + // + // digits + // digit digits? + // + // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? + + if (m_ptr < m_end && *m_ptr == '-') // -? + ++m_ptr; + + // (0 | [1-9][0-9]*) + if (m_ptr < m_end && *m_ptr == '0') // 0 + ++m_ptr; + else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] + ++m_ptr; + // [0-9]* + while (m_ptr < m_end && isASCIIDigit(*m_ptr)) + ++m_ptr; + } else + return TokError; + + // ('.' [0-9]+)? + if (m_ptr < m_end && *m_ptr == '.') { + ++m_ptr; + // [0-9]+ + if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) + return TokError; + + ++m_ptr; + while (m_ptr < m_end && isASCIIDigit(*m_ptr)) + ++m_ptr; + } + + // ([eE][+-]? [0-9]+)? + if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] + ++m_ptr; + + // [-+]? + if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) + ++m_ptr; + + // [0-9]+ + if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) + return TokError; + + ++m_ptr; + while (m_ptr < m_end && isASCIIDigit(*m_ptr)) + ++m_ptr; + } + + token.type = TokNumber; + token.end = m_ptr; + Vector<char, 64> buffer(token.end - token.start + 1); + int i; + for (i = 0; i < token.end - token.start; i++) { + ASSERT(static_cast<char>(token.start[i]) == token.start[i]); + buffer[i] = static_cast<char>(token.start[i]); + } + buffer[i] = 0; + char* end; + token.numberToken = WTF::strtod(buffer.data(), &end); + ASSERT(buffer.data() + (token.end - token.start) == end); + return TokNumber; +} + +JSValue LiteralParser::parse(ParserState initialState) +{ + ParserState state = initialState; + MarkedArgumentBuffer objectStack; + JSValue lastValue; + Vector<ParserState, 16> stateStack; + Vector<Identifier, 16> identifierStack; + while (1) { + switch(state) { + startParseArray: + case StartParseArray: { + JSArray* array = constructEmptyArray(m_exec); + objectStack.append(array); + // fallthrough + } + doParseArrayStartExpression: + case DoParseArrayStartExpression: { + TokenType lastToken = m_lexer.currentToken().type; + if (m_lexer.next() == TokRBracket) { + if (lastToken == TokComma) + return JSValue(); + m_lexer.next(); + lastValue = objectStack.last(); + objectStack.removeLast(); + break; + } + + stateStack.append(DoParseArrayEndExpression); + goto startParseExpression; + } + case DoParseArrayEndExpression: { + asArray(objectStack.last())->push(m_exec, lastValue); + + if (m_lexer.currentToken().type == TokComma) + goto doParseArrayStartExpression; + + if (m_lexer.currentToken().type != TokRBracket) + return JSValue(); + + m_lexer.next(); + lastValue = objectStack.last(); + objectStack.removeLast(); + break; + } + startParseObject: + case StartParseObject: { + JSObject* object = constructEmptyObject(m_exec); + objectStack.append(object); + + TokenType type = m_lexer.next(); + if (type == TokString) { + Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); + + // Check for colon + if (m_lexer.next() != TokColon) + return JSValue(); + + m_lexer.next(); + identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); + stateStack.append(DoParseObjectEndExpression); + goto startParseExpression; + } else if (type != TokRBrace) + return JSValue(); + m_lexer.next(); + lastValue = objectStack.last(); + objectStack.removeLast(); + break; + } + doParseObjectStartExpression: + case DoParseObjectStartExpression: { + TokenType type = m_lexer.next(); + if (type != TokString) + return JSValue(); + Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); + + // Check for colon + if (m_lexer.next() != TokColon) + return JSValue(); + + m_lexer.next(); + identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); + stateStack.append(DoParseObjectEndExpression); + goto startParseExpression; + } + case DoParseObjectEndExpression: + { + asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue); + identifierStack.removeLast(); + if (m_lexer.currentToken().type == TokComma) + goto doParseObjectStartExpression; + if (m_lexer.currentToken().type != TokRBrace) + return JSValue(); + m_lexer.next(); + lastValue = objectStack.last(); + objectStack.removeLast(); + break; + } + startParseExpression: + case StartParseExpression: { + switch (m_lexer.currentToken().type) { + case TokLBracket: + goto startParseArray; + case TokLBrace: + goto startParseObject; + case TokString: { + Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); + m_lexer.next(); + lastValue = jsString(m_exec, stringToken.stringToken); + break; + } + case TokNumber: { + Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); + m_lexer.next(); + lastValue = jsNumber(numberToken.numberToken); + break; + } + case TokNull: + m_lexer.next(); + lastValue = jsNull(); + break; + + case TokTrue: + m_lexer.next(); + lastValue = jsBoolean(true); + break; + + case TokFalse: + m_lexer.next(); + lastValue = jsBoolean(false); + break; + + default: + // Error + return JSValue(); + } + break; + } + case StartParseStatement: { + switch (m_lexer.currentToken().type) { + case TokLBracket: + case TokNumber: + case TokString: + goto startParseExpression; + + case TokLParen: { + m_lexer.next(); + stateStack.append(StartParseStatementEndStatement); + goto startParseExpression; + } + default: + return JSValue(); + } + } + case StartParseStatementEndStatement: { + ASSERT(stateStack.isEmpty()); + if (m_lexer.currentToken().type != TokRParen) + return JSValue(); + if (m_lexer.next() == TokEnd) + return lastValue; + return JSValue(); + } + default: + ASSERT_NOT_REACHED(); + } + if (stateStack.isEmpty()) + return lastValue; + state = stateStack.last(); + stateStack.removeLast(); + continue; + } +} + +} |