summaryrefslogtreecommitdiffstats
path: root/Source/JavaScriptCore/runtime/LiteralParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/runtime/LiteralParser.cpp')
-rw-r--r--Source/JavaScriptCore/runtime/LiteralParser.cpp462
1 files changed, 462 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/runtime/LiteralParser.cpp b/Source/JavaScriptCore/runtime/LiteralParser.cpp
new file mode 100644
index 0000000..df87e7f
--- /dev/null
+++ b/Source/JavaScriptCore/runtime/LiteralParser.cpp
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "LiteralParser.h"
+
+#include "JSArray.h"
+#include "JSString.h"
+#include "Lexer.h"
+#include "UStringBuilder.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/dtoa.h>
+
+namespace JSC {
+
+static inline bool isJSONWhiteSpace(const UChar& c)
+{
+ // The JSON RFC 4627 defines a list of allowed characters to be considered
+ // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
+ return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
+}
+
+LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
+{
+ while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
+ ++m_ptr;
+
+ ASSERT(m_ptr <= m_end);
+ if (m_ptr >= m_end) {
+ token.type = TokEnd;
+ token.start = token.end = m_ptr;
+ return TokEnd;
+ }
+ token.type = TokError;
+ token.start = m_ptr;
+ switch (*m_ptr) {
+ case '[':
+ token.type = TokLBracket;
+ token.end = ++m_ptr;
+ return TokLBracket;
+ case ']':
+ token.type = TokRBracket;
+ token.end = ++m_ptr;
+ return TokRBracket;
+ case '(':
+ token.type = TokLParen;
+ token.end = ++m_ptr;
+ return TokLBracket;
+ case ')':
+ token.type = TokRParen;
+ token.end = ++m_ptr;
+ return TokRBracket;
+ case '{':
+ token.type = TokLBrace;
+ token.end = ++m_ptr;
+ return TokLBrace;
+ case '}':
+ token.type = TokRBrace;
+ token.end = ++m_ptr;
+ return TokRBrace;
+ case ',':
+ token.type = TokComma;
+ token.end = ++m_ptr;
+ return TokComma;
+ case ':':
+ token.type = TokColon;
+ token.end = ++m_ptr;
+ return TokColon;
+ case '"':
+ if (m_mode == StrictJSON)
+ return lexString<StrictJSON>(token);
+ return lexString<NonStrictJSON>(token);
+ case 't':
+ if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
+ m_ptr += 4;
+ token.type = TokTrue;
+ token.end = m_ptr;
+ return TokTrue;
+ }
+ break;
+ case 'f':
+ if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
+ m_ptr += 5;
+ token.type = TokFalse;
+ token.end = m_ptr;
+ return TokFalse;
+ }
+ break;
+ case 'n':
+ if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
+ m_ptr += 4;
+ token.type = TokNull;
+ token.end = m_ptr;
+ return TokNull;
+ }
+ break;
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return lexNumber(token);
+ }
+ return TokError;
+}
+
+template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
+{
+ return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
+}
+
+// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
+template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
+{
+ ++m_ptr;
+ const UChar* runStart;
+ UStringBuilder builder;
+ do {
+ runStart = m_ptr;
+ while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
+ ++m_ptr;
+ if (runStart < m_ptr)
+ builder.append(runStart, m_ptr - runStart);
+ if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
+ ++m_ptr;
+ if (m_ptr >= m_end)
+ return TokError;
+ switch (*m_ptr) {
+ case '"':
+ builder.append('"');
+ m_ptr++;
+ break;
+ case '\\':
+ builder.append('\\');
+ m_ptr++;
+ break;
+ case '/':
+ builder.append('/');
+ m_ptr++;
+ break;
+ case 'b':
+ builder.append('\b');
+ m_ptr++;
+ break;
+ case 'f':
+ builder.append('\f');
+ m_ptr++;
+ break;
+ case 'n':
+ builder.append('\n');
+ m_ptr++;
+ break;
+ case 'r':
+ builder.append('\r');
+ m_ptr++;
+ break;
+ case 't':
+ builder.append('\t');
+ m_ptr++;
+ break;
+
+ case 'u':
+ if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
+ return TokError;
+ for (int i = 1; i < 5; i++) {
+ if (!isASCIIHexDigit(m_ptr[i]))
+ return TokError;
+ }
+ builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
+ m_ptr += 5;
+ break;
+
+ default:
+ return TokError;
+ }
+ }
+ } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
+
+ if (m_ptr >= m_end || *m_ptr != '"')
+ return TokError;
+
+ token.stringToken = builder.toUString();
+ token.type = TokString;
+ token.end = ++m_ptr;
+ return TokString;
+}
+
+LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
+{
+ // ES5 and json.org define numbers as
+ // number
+ // int
+ // int frac? exp?
+ //
+ // int
+ // -? 0
+ // -? digit1-9 digits?
+ //
+ // digits
+ // digit digits?
+ //
+ // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
+
+ if (m_ptr < m_end && *m_ptr == '-') // -?
+ ++m_ptr;
+
+ // (0 | [1-9][0-9]*)
+ if (m_ptr < m_end && *m_ptr == '0') // 0
+ ++m_ptr;
+ else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
+ ++m_ptr;
+ // [0-9]*
+ while (m_ptr < m_end && isASCIIDigit(*m_ptr))
+ ++m_ptr;
+ } else
+ return TokError;
+
+ // ('.' [0-9]+)?
+ if (m_ptr < m_end && *m_ptr == '.') {
+ ++m_ptr;
+ // [0-9]+
+ if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
+ return TokError;
+
+ ++m_ptr;
+ while (m_ptr < m_end && isASCIIDigit(*m_ptr))
+ ++m_ptr;
+ }
+
+ // ([eE][+-]? [0-9]+)?
+ if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
+ ++m_ptr;
+
+ // [-+]?
+ if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
+ ++m_ptr;
+
+ // [0-9]+
+ if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
+ return TokError;
+
+ ++m_ptr;
+ while (m_ptr < m_end && isASCIIDigit(*m_ptr))
+ ++m_ptr;
+ }
+
+ token.type = TokNumber;
+ token.end = m_ptr;
+ Vector<char, 64> buffer(token.end - token.start + 1);
+ int i;
+ for (i = 0; i < token.end - token.start; i++) {
+ ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
+ buffer[i] = static_cast<char>(token.start[i]);
+ }
+ buffer[i] = 0;
+ char* end;
+ token.numberToken = WTF::strtod(buffer.data(), &end);
+ ASSERT(buffer.data() + (token.end - token.start) == end);
+ return TokNumber;
+}
+
+JSValue LiteralParser::parse(ParserState initialState)
+{
+ ParserState state = initialState;
+ MarkedArgumentBuffer objectStack;
+ JSValue lastValue;
+ Vector<ParserState, 16> stateStack;
+ Vector<Identifier, 16> identifierStack;
+ while (1) {
+ switch(state) {
+ startParseArray:
+ case StartParseArray: {
+ JSArray* array = constructEmptyArray(m_exec);
+ objectStack.append(array);
+ // fallthrough
+ }
+ doParseArrayStartExpression:
+ case DoParseArrayStartExpression: {
+ TokenType lastToken = m_lexer.currentToken().type;
+ if (m_lexer.next() == TokRBracket) {
+ if (lastToken == TokComma)
+ return JSValue();
+ m_lexer.next();
+ lastValue = objectStack.last();
+ objectStack.removeLast();
+ break;
+ }
+
+ stateStack.append(DoParseArrayEndExpression);
+ goto startParseExpression;
+ }
+ case DoParseArrayEndExpression: {
+ asArray(objectStack.last())->push(m_exec, lastValue);
+
+ if (m_lexer.currentToken().type == TokComma)
+ goto doParseArrayStartExpression;
+
+ if (m_lexer.currentToken().type != TokRBracket)
+ return JSValue();
+
+ m_lexer.next();
+ lastValue = objectStack.last();
+ objectStack.removeLast();
+ break;
+ }
+ startParseObject:
+ case StartParseObject: {
+ JSObject* object = constructEmptyObject(m_exec);
+ objectStack.append(object);
+
+ TokenType type = m_lexer.next();
+ if (type == TokString) {
+ Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
+
+ // Check for colon
+ if (m_lexer.next() != TokColon)
+ return JSValue();
+
+ m_lexer.next();
+ identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
+ stateStack.append(DoParseObjectEndExpression);
+ goto startParseExpression;
+ } else if (type != TokRBrace)
+ return JSValue();
+ m_lexer.next();
+ lastValue = objectStack.last();
+ objectStack.removeLast();
+ break;
+ }
+ doParseObjectStartExpression:
+ case DoParseObjectStartExpression: {
+ TokenType type = m_lexer.next();
+ if (type != TokString)
+ return JSValue();
+ Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
+
+ // Check for colon
+ if (m_lexer.next() != TokColon)
+ return JSValue();
+
+ m_lexer.next();
+ identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
+ stateStack.append(DoParseObjectEndExpression);
+ goto startParseExpression;
+ }
+ case DoParseObjectEndExpression:
+ {
+ asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
+ identifierStack.removeLast();
+ if (m_lexer.currentToken().type == TokComma)
+ goto doParseObjectStartExpression;
+ if (m_lexer.currentToken().type != TokRBrace)
+ return JSValue();
+ m_lexer.next();
+ lastValue = objectStack.last();
+ objectStack.removeLast();
+ break;
+ }
+ startParseExpression:
+ case StartParseExpression: {
+ switch (m_lexer.currentToken().type) {
+ case TokLBracket:
+ goto startParseArray;
+ case TokLBrace:
+ goto startParseObject;
+ case TokString: {
+ Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
+ m_lexer.next();
+ lastValue = jsString(m_exec, stringToken.stringToken);
+ break;
+ }
+ case TokNumber: {
+ Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
+ m_lexer.next();
+ lastValue = jsNumber(numberToken.numberToken);
+ break;
+ }
+ case TokNull:
+ m_lexer.next();
+ lastValue = jsNull();
+ break;
+
+ case TokTrue:
+ m_lexer.next();
+ lastValue = jsBoolean(true);
+ break;
+
+ case TokFalse:
+ m_lexer.next();
+ lastValue = jsBoolean(false);
+ break;
+
+ default:
+ // Error
+ return JSValue();
+ }
+ break;
+ }
+ case StartParseStatement: {
+ switch (m_lexer.currentToken().type) {
+ case TokLBracket:
+ case TokNumber:
+ case TokString:
+ goto startParseExpression;
+
+ case TokLParen: {
+ m_lexer.next();
+ stateStack.append(StartParseStatementEndStatement);
+ goto startParseExpression;
+ }
+ default:
+ return JSValue();
+ }
+ }
+ case StartParseStatementEndStatement: {
+ ASSERT(stateStack.isEmpty());
+ if (m_lexer.currentToken().type != TokRParen)
+ return JSValue();
+ if (m_lexer.next() == TokEnd)
+ return lastValue;
+ return JSValue();
+ }
+ default:
+ ASSERT_NOT_REACHED();
+ }
+ if (stateStack.isEmpty())
+ return lastValue;
+ state = stateStack.last();
+ stateStack.removeLast();
+ continue;
+ }
+}
+
+}