diff options
Diffstat (limited to 'WebCore/xml/XPathParser.cpp')
-rw-r--r-- | WebCore/xml/XPathParser.cpp | 633 |
1 files changed, 633 insertions, 0 deletions
diff --git a/WebCore/xml/XPathParser.cpp b/WebCore/xml/XPathParser.cpp new file mode 100644 index 0000000..77c3011 --- /dev/null +++ b/WebCore/xml/XPathParser.cpp @@ -0,0 +1,633 @@ +/* + * Copyright 2005 Maksim Orlovich <maksim@kde.org> + * Copyright (C) 2006 Apple Computer, Inc. + * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "XPathParser.h" + +#if ENABLE(XPATH) + +#include "ExceptionCode.h" +#include "StringHash.h" +#include "XPathEvaluator.h" +#include "XPathException.h" +#include "XPathNSResolver.h" +#include "XPathStep.h" + +int xpathyyparse(void*); + +using namespace WTF; +using namespace Unicode; + +namespace WebCore { +namespace XPath { + +class LocationPath; + +#include "XPathGrammar.h" + +Parser* Parser::currentParser = 0; + +enum XMLCat { NameStart, NameCont, NotPartOfName }; + +static XMLCat charCat(UChar aChar) +{ + //### might need to add some special cases from the XML spec. + + if (aChar == '_') + return NameStart; + + if (aChar == '.' || aChar == '-') + return NameCont; + CharCategory category = Unicode::category(aChar); + if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) + return NameStart; + if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) + return NameCont; + return NotPartOfName; +} + +static void setUpAxisNamesMap(HashMap<String, Step::Axis>& axisNames) +{ + struct AxisName { + const char* name; + Step::Axis axis; + }; + const AxisName axisNameList[] = { + { "ancestor", Step::AncestorAxis }, + { "ancestor-or-self", Step::AncestorOrSelfAxis }, + { "attribute", Step::AttributeAxis }, + { "child", Step::ChildAxis }, + { "descendant", Step::DescendantAxis }, + { "descendant-or-self", Step::DescendantOrSelfAxis }, + { "following", Step::FollowingAxis }, + { "following-sibling", Step::FollowingSiblingAxis }, + { "namespace", Step::NamespaceAxis }, + { "parent", Step::ParentAxis }, + { "preceding", Step::PrecedingAxis }, + { "preceding-sibling", Step::PrecedingSiblingAxis }, + { "self", Step::SelfAxis } + }; + for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) + axisNames.set(axisNameList[i].name, axisNameList[i].axis); +} + +static bool isAxisName(const String& name, Step::Axis& type) +{ + static HashMap<String, Step::Axis> axisNames; + + if (axisNames.isEmpty()) + setUpAxisNamesMap(axisNames); + + HashMap<String, Step::Axis>::iterator it = axisNames.find(name); + if (it == axisNames.end()) + return false; + type = it->second; + return true; +} + +static bool isNodeTypeName(const String& name) +{ + static HashSet<String> nodeTypeNames; + if (nodeTypeNames.isEmpty()) { + nodeTypeNames.add("comment"); + nodeTypeNames.add("text"); + nodeTypeNames.add("processing-instruction"); + nodeTypeNames.add("node"); + } + return nodeTypeNames.contains(name); +} + +/* Returns whether the last parsed token matches the [32] Operator rule + * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate + * the tokens. + */ +bool Parser::isOperatorContext() const +{ + if (m_nextPos == 0) + return false; + + switch (m_lastTokenType) { + case AND: case OR: case MULOP: + case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: + case EQOP: case RELOP: + case '@': case AXISNAME: case '(': case '[': + return false; + default: + return true; + } +} + +void Parser::skipWS() +{ + while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) + ++m_nextPos; +} + +Token Parser::makeTokenAndAdvance(int code, int advance) +{ + m_nextPos += advance; + return Token(code); +} + +Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) +{ + m_nextPos += advance; + return Token(code, val); +} + +Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) +{ + m_nextPos += advance; + return Token(code, val); +} + +// Returns next char if it's there and interesting, 0 otherwise +char Parser::peekAheadHelper() +{ + if (m_nextPos + 1 >= m_data.length()) + return 0; + UChar next = m_data[m_nextPos + 1]; + if (next >= 0xff) + return 0; + return next; +} + +char Parser::peekCurHelper() +{ + if (m_nextPos >= m_data.length()) + return 0; + UChar next = m_data[m_nextPos]; + if (next >= 0xff) + return 0; + return next; +} + +Token Parser::lexString() +{ + UChar delimiter = m_data[m_nextPos]; + int startPos = m_nextPos + 1; + + for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { + if (m_data[m_nextPos] == delimiter) { + String value = m_data.substring(startPos, m_nextPos - startPos); + if (value.isNull()) + value = ""; + ++m_nextPos; // Consume the char. + return Token(LITERAL, value); + } + } + + // Ouch, went off the end -- report error. + return Token(XPATH_ERROR); +} + +Token Parser::lexNumber() +{ + int startPos = m_nextPos; + bool seenDot = false; + + // Go until end or a non-digits character. + for (; m_nextPos < m_data.length(); ++m_nextPos) { + UChar aChar = m_data[m_nextPos]; + if (aChar >= 0xff) break; + + if (aChar < '0' || aChar > '9') { + if (aChar == '.' && !seenDot) + seenDot = true; + else + break; + } + } + + return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); +} + +bool Parser::lexNCName(String& name) +{ + int startPos = m_nextPos; + if (m_nextPos >= m_data.length()) + return false; + + if (charCat(m_data[m_nextPos]) != NameStart) + return false; + + // Keep going until we get a character that's not good for names. + for (; m_nextPos < m_data.length(); ++m_nextPos) + if (charCat(m_data[m_nextPos]) == NotPartOfName) + break; + + name = m_data.substring(startPos, m_nextPos - startPos); + return true; +} + +bool Parser::lexQName(String& name) +{ + String n1; + if (!lexNCName(n1)) + return false; + + skipWS(); + + // If the next character is :, what we just got it the prefix, if not, + // it's the whole thing. + if (peekAheadHelper() != ':') { + name = n1; + return true; + } + + String n2; + if (!lexNCName(n2)) + return false; + + name = n1 + ":" + n2; + return true; +} + +Token Parser::nextTokenInternal() +{ + skipWS(); + + if (m_nextPos >= m_data.length()) + return Token(0); + + char code = peekCurHelper(); + switch (code) { + case '(': case ')': case '[': case ']': + case '@': case ',': case '|': + return makeTokenAndAdvance(code); + case '\'': + case '\"': + return lexString(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return lexNumber(); + case '.': { + char next = peekAheadHelper(); + if (next == '.') + return makeTokenAndAdvance(DOTDOT, 2); + if (next >= '0' && next <= '9') + return lexNumber(); + return makeTokenAndAdvance('.'); + } + case '/': + if (peekAheadHelper() == '/') + return makeTokenAndAdvance(SLASHSLASH, 2); + return makeTokenAndAdvance('/'); + case '+': + return makeTokenAndAdvance(PLUS); + case '-': + return makeTokenAndAdvance(MINUS); + case '=': + return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); + case '!': + if (peekAheadHelper() == '=') + return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); + return Token(XPATH_ERROR); + case '<': + if (peekAheadHelper() == '=') + return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); + return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); + case '>': + if (peekAheadHelper() == '=') + return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); + return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); + case '*': + if (isOperatorContext()) + return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); + ++m_nextPos; + return Token(NAMETEST, "*"); + case '$': { // $ QName + m_nextPos++; + String name; + if (!lexQName(name)) + return Token(XPATH_ERROR); + return Token(VARIABLEREFERENCE, name); + } + } + + String name; + if (!lexNCName(name)) + return Token(XPATH_ERROR); + + skipWS(); + // If we're in an operator context, check for any operator names + if (isOperatorContext()) { + if (name == "and") //### hash? + return Token(AND); + if (name == "or") + return Token(OR); + if (name == "mod") + return Token(MULOP, NumericOp::OP_Mod); + if (name == "div") + return Token(MULOP, NumericOp::OP_Div); + } + + // See whether we are at a : + if (peekCurHelper() == ':') { + m_nextPos++; + // Any chance it's an axis name? + if (peekCurHelper() == ':') { + m_nextPos++; + + //It might be an axis name. + Step::Axis axis; + if (isAxisName(name, axis)) + return Token(AXISNAME, axis); + // Ugh, :: is only valid in axis names -> error + return Token(XPATH_ERROR); + } + + // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest + skipWS(); + if (peekCurHelper() == '*') { + m_nextPos++; + return Token(NAMETEST, name + ":*"); + } + + // Make a full qname. + String n2; + if (!lexNCName(n2)) + return Token(XPATH_ERROR); + + name = name + ":" + n2; + } + + skipWS(); + if (peekCurHelper() == '(') { + //note: we don't swallow the (here! + + //either node type of function name + if (isNodeTypeName(name)) { + if (name == "processing-instruction") + return Token(PI, name); + + return Token(NODETYPE, name); + } + //must be a function name. + return Token(FUNCTIONNAME, name); + } + + // At this point, it must be NAMETEST. + return Token(NAMETEST, name); +} + +Token Parser::nextToken() +{ + Token toRet = nextTokenInternal(); + m_lastTokenType = toRet.type; + return toRet; +} + +Parser::Parser() +{ + reset(String()); +} + +void Parser::reset(const String& data) +{ + m_nextPos = 0; + m_data = data; + m_lastTokenType = 0; + + m_topExpr = 0; + m_gotNamespaceError = false; +} + +int Parser::lex(void* data) +{ + YYSTYPE* yylval = static_cast<YYSTYPE*>(data); + Token tok = nextToken(); + + switch (tok.type) { + case AXISNAME: + yylval->axis = tok.axis; + break; + case MULOP: + yylval->numop = tok.numop; + break; + case RELOP: + case EQOP: + yylval->eqop = tok.eqop; + break; + case NODETYPE: + case PI: + case FUNCTIONNAME: + case LITERAL: + case VARIABLEREFERENCE: + case NUMBER: + case NAMETEST: + yylval->str = new String(tok.str); + registerString(yylval->str); + break; + } + + return tok.type; +} + +bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI) +{ + int colon = qName.find(':'); + if (colon >= 0) { + if (!m_resolver) + return false; + namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); + if (namespaceURI.isNull()) + return false; + localName = qName.substring(colon + 1); + } else + localName = qName; + + return true; +} + +Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec) +{ + reset(statement); + + m_resolver = resolver; + + Parser* oldParser = currentParser; + currentParser = this; + int parseError = xpathyyparse(this); + currentParser = oldParser; + + if (parseError) { + deleteAllValues(m_parseNodes); + m_parseNodes.clear(); + + HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end(); + for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) { + deleteAllValues(**it); + delete *it; + } + m_predicateVectors.clear(); + + HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end(); + for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) { + deleteAllValues(**it); + delete *it; + } + m_expressionVectors.clear(); + + deleteAllValues(m_strings); + m_strings.clear(); + + deleteAllValues(m_nodeTests); + m_nodeTests.clear(); + + m_topExpr = 0; + + if (m_gotNamespaceError) + ec = NAMESPACE_ERR; + else + ec = XPathException::INVALID_EXPRESSION_ERR; + return 0; + } + + ASSERT(m_parseNodes.size() == 1); + ASSERT(*m_parseNodes.begin() == m_topExpr); + ASSERT(m_expressionVectors.size() == 0); + ASSERT(m_predicateVectors.size() == 0); + ASSERT(m_strings.size() == 0); + ASSERT(m_nodeTests.size() == 0); + + m_parseNodes.clear(); + Expression* result = m_topExpr; + m_topExpr = 0; + + return result; +} + +void Parser::registerParseNode(ParseNode* node) +{ + if (node == 0) + return; + + ASSERT(!m_parseNodes.contains(node)); + + m_parseNodes.add(node); +} + +void Parser::unregisterParseNode(ParseNode* node) +{ + if (node == 0) + return; + + ASSERT(m_parseNodes.contains(node)); + + m_parseNodes.remove(node); +} + +void Parser::registerPredicateVector(Vector<Predicate*>* vector) +{ + if (vector == 0) + return; + + ASSERT(!m_predicateVectors.contains(vector)); + + m_predicateVectors.add(vector); +} + +void Parser::deletePredicateVector(Vector<Predicate*>* vector) +{ + if (vector == 0) + return; + + ASSERT(m_predicateVectors.contains(vector)); + + m_predicateVectors.remove(vector); + delete vector; +} + + +void Parser::registerExpressionVector(Vector<Expression*>* vector) +{ + if (vector == 0) + return; + + ASSERT(!m_expressionVectors.contains(vector)); + + m_expressionVectors.add(vector); +} + +void Parser::deleteExpressionVector(Vector<Expression*>* vector) +{ + if (vector == 0) + return; + + ASSERT(m_expressionVectors.contains(vector)); + + m_expressionVectors.remove(vector); + delete vector; +} + +void Parser::registerString(String* s) +{ + if (s == 0) + return; + + ASSERT(!m_strings.contains(s)); + + m_strings.add(s); +} + +void Parser::deleteString(String* s) +{ + if (s == 0) + return; + + ASSERT(m_strings.contains(s)); + + m_strings.remove(s); + delete s; +} + +void Parser::registerNodeTest(Step::NodeTest* t) +{ + if (t == 0) + return; + + ASSERT(!m_nodeTests.contains(t)); + + m_nodeTests.add(t); +} + +void Parser::deleteNodeTest(Step::NodeTest* t) +{ + if (t == 0) + return; + + ASSERT(m_nodeTests.contains(t)); + + m_nodeTests.remove(t); + delete t; +} + +} +} + +#endif // ENABLE(XPATH) |