summaryrefslogtreecommitdiffstats
path: root/WebCore/xml/XPathParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/xml/XPathParser.cpp')
-rw-r--r--WebCore/xml/XPathParser.cpp633
1 files changed, 633 insertions, 0 deletions
diff --git a/WebCore/xml/XPathParser.cpp b/WebCore/xml/XPathParser.cpp
new file mode 100644
index 0000000..77c3011
--- /dev/null
+++ b/WebCore/xml/XPathParser.cpp
@@ -0,0 +1,633 @@
+/*
+ * Copyright 2005 Maksim Orlovich <maksim@kde.org>
+ * Copyright (C) 2006 Apple Computer, Inc.
+ * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "XPathParser.h"
+
+#if ENABLE(XPATH)
+
+#include "ExceptionCode.h"
+#include "StringHash.h"
+#include "XPathEvaluator.h"
+#include "XPathException.h"
+#include "XPathNSResolver.h"
+#include "XPathStep.h"
+
+int xpathyyparse(void*);
+
+using namespace WTF;
+using namespace Unicode;
+
+namespace WebCore {
+namespace XPath {
+
+class LocationPath;
+
+#include "XPathGrammar.h"
+
+Parser* Parser::currentParser = 0;
+
+enum XMLCat { NameStart, NameCont, NotPartOfName };
+
+static XMLCat charCat(UChar aChar)
+{
+ //### might need to add some special cases from the XML spec.
+
+ if (aChar == '_')
+ return NameStart;
+
+ if (aChar == '.' || aChar == '-')
+ return NameCont;
+ CharCategory category = Unicode::category(aChar);
+ if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
+ return NameStart;
+ if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
+ return NameCont;
+ return NotPartOfName;
+}
+
+static void setUpAxisNamesMap(HashMap<String, Step::Axis>& axisNames)
+{
+ struct AxisName {
+ const char* name;
+ Step::Axis axis;
+ };
+ const AxisName axisNameList[] = {
+ { "ancestor", Step::AncestorAxis },
+ { "ancestor-or-self", Step::AncestorOrSelfAxis },
+ { "attribute", Step::AttributeAxis },
+ { "child", Step::ChildAxis },
+ { "descendant", Step::DescendantAxis },
+ { "descendant-or-self", Step::DescendantOrSelfAxis },
+ { "following", Step::FollowingAxis },
+ { "following-sibling", Step::FollowingSiblingAxis },
+ { "namespace", Step::NamespaceAxis },
+ { "parent", Step::ParentAxis },
+ { "preceding", Step::PrecedingAxis },
+ { "preceding-sibling", Step::PrecedingSiblingAxis },
+ { "self", Step::SelfAxis }
+ };
+ for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
+ axisNames.set(axisNameList[i].name, axisNameList[i].axis);
+}
+
+static bool isAxisName(const String& name, Step::Axis& type)
+{
+ static HashMap<String, Step::Axis> axisNames;
+
+ if (axisNames.isEmpty())
+ setUpAxisNamesMap(axisNames);
+
+ HashMap<String, Step::Axis>::iterator it = axisNames.find(name);
+ if (it == axisNames.end())
+ return false;
+ type = it->second;
+ return true;
+}
+
+static bool isNodeTypeName(const String& name)
+{
+ static HashSet<String> nodeTypeNames;
+ if (nodeTypeNames.isEmpty()) {
+ nodeTypeNames.add("comment");
+ nodeTypeNames.add("text");
+ nodeTypeNames.add("processing-instruction");
+ nodeTypeNames.add("node");
+ }
+ return nodeTypeNames.contains(name);
+}
+
+/* Returns whether the last parsed token matches the [32] Operator rule
+ * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
+ * the tokens.
+ */
+bool Parser::isOperatorContext() const
+{
+ if (m_nextPos == 0)
+ return false;
+
+ switch (m_lastTokenType) {
+ case AND: case OR: case MULOP:
+ case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
+ case EQOP: case RELOP:
+ case '@': case AXISNAME: case '(': case '[':
+ return false;
+ default:
+ return true;
+ }
+}
+
+void Parser::skipWS()
+{
+ while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
+ ++m_nextPos;
+}
+
+Token Parser::makeTokenAndAdvance(int code, int advance)
+{
+ m_nextPos += advance;
+ return Token(code);
+}
+
+Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
+{
+ m_nextPos += advance;
+ return Token(code, val);
+}
+
+Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
+{
+ m_nextPos += advance;
+ return Token(code, val);
+}
+
+// Returns next char if it's there and interesting, 0 otherwise
+char Parser::peekAheadHelper()
+{
+ if (m_nextPos + 1 >= m_data.length())
+ return 0;
+ UChar next = m_data[m_nextPos + 1];
+ if (next >= 0xff)
+ return 0;
+ return next;
+}
+
+char Parser::peekCurHelper()
+{
+ if (m_nextPos >= m_data.length())
+ return 0;
+ UChar next = m_data[m_nextPos];
+ if (next >= 0xff)
+ return 0;
+ return next;
+}
+
+Token Parser::lexString()
+{
+ UChar delimiter = m_data[m_nextPos];
+ int startPos = m_nextPos + 1;
+
+ for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
+ if (m_data[m_nextPos] == delimiter) {
+ String value = m_data.substring(startPos, m_nextPos - startPos);
+ if (value.isNull())
+ value = "";
+ ++m_nextPos; // Consume the char.
+ return Token(LITERAL, value);
+ }
+ }
+
+ // Ouch, went off the end -- report error.
+ return Token(XPATH_ERROR);
+}
+
+Token Parser::lexNumber()
+{
+ int startPos = m_nextPos;
+ bool seenDot = false;
+
+ // Go until end or a non-digits character.
+ for (; m_nextPos < m_data.length(); ++m_nextPos) {
+ UChar aChar = m_data[m_nextPos];
+ if (aChar >= 0xff) break;
+
+ if (aChar < '0' || aChar > '9') {
+ if (aChar == '.' && !seenDot)
+ seenDot = true;
+ else
+ break;
+ }
+ }
+
+ return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
+}
+
+bool Parser::lexNCName(String& name)
+{
+ int startPos = m_nextPos;
+ if (m_nextPos >= m_data.length())
+ return false;
+
+ if (charCat(m_data[m_nextPos]) != NameStart)
+ return false;
+
+ // Keep going until we get a character that's not good for names.
+ for (; m_nextPos < m_data.length(); ++m_nextPos)
+ if (charCat(m_data[m_nextPos]) == NotPartOfName)
+ break;
+
+ name = m_data.substring(startPos, m_nextPos - startPos);
+ return true;
+}
+
+bool Parser::lexQName(String& name)
+{
+ String n1;
+ if (!lexNCName(n1))
+ return false;
+
+ skipWS();
+
+ // If the next character is :, what we just got it the prefix, if not,
+ // it's the whole thing.
+ if (peekAheadHelper() != ':') {
+ name = n1;
+ return true;
+ }
+
+ String n2;
+ if (!lexNCName(n2))
+ return false;
+
+ name = n1 + ":" + n2;
+ return true;
+}
+
+Token Parser::nextTokenInternal()
+{
+ skipWS();
+
+ if (m_nextPos >= m_data.length())
+ return Token(0);
+
+ char code = peekCurHelper();
+ switch (code) {
+ case '(': case ')': case '[': case ']':
+ case '@': case ',': case '|':
+ return makeTokenAndAdvance(code);
+ case '\'':
+ case '\"':
+ return lexString();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return lexNumber();
+ case '.': {
+ char next = peekAheadHelper();
+ if (next == '.')
+ return makeTokenAndAdvance(DOTDOT, 2);
+ if (next >= '0' && next <= '9')
+ return lexNumber();
+ return makeTokenAndAdvance('.');
+ }
+ case '/':
+ if (peekAheadHelper() == '/')
+ return makeTokenAndAdvance(SLASHSLASH, 2);
+ return makeTokenAndAdvance('/');
+ case '+':
+ return makeTokenAndAdvance(PLUS);
+ case '-':
+ return makeTokenAndAdvance(MINUS);
+ case '=':
+ return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
+ case '!':
+ if (peekAheadHelper() == '=')
+ return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
+ return Token(XPATH_ERROR);
+ case '<':
+ if (peekAheadHelper() == '=')
+ return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
+ return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
+ case '>':
+ if (peekAheadHelper() == '=')
+ return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
+ return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
+ case '*':
+ if (isOperatorContext())
+ return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
+ ++m_nextPos;
+ return Token(NAMETEST, "*");
+ case '$': { // $ QName
+ m_nextPos++;
+ String name;
+ if (!lexQName(name))
+ return Token(XPATH_ERROR);
+ return Token(VARIABLEREFERENCE, name);
+ }
+ }
+
+ String name;
+ if (!lexNCName(name))
+ return Token(XPATH_ERROR);
+
+ skipWS();
+ // If we're in an operator context, check for any operator names
+ if (isOperatorContext()) {
+ if (name == "and") //### hash?
+ return Token(AND);
+ if (name == "or")
+ return Token(OR);
+ if (name == "mod")
+ return Token(MULOP, NumericOp::OP_Mod);
+ if (name == "div")
+ return Token(MULOP, NumericOp::OP_Div);
+ }
+
+ // See whether we are at a :
+ if (peekCurHelper() == ':') {
+ m_nextPos++;
+ // Any chance it's an axis name?
+ if (peekCurHelper() == ':') {
+ m_nextPos++;
+
+ //It might be an axis name.
+ Step::Axis axis;
+ if (isAxisName(name, axis))
+ return Token(AXISNAME, axis);
+ // Ugh, :: is only valid in axis names -> error
+ return Token(XPATH_ERROR);
+ }
+
+ // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
+ skipWS();
+ if (peekCurHelper() == '*') {
+ m_nextPos++;
+ return Token(NAMETEST, name + ":*");
+ }
+
+ // Make a full qname.
+ String n2;
+ if (!lexNCName(n2))
+ return Token(XPATH_ERROR);
+
+ name = name + ":" + n2;
+ }
+
+ skipWS();
+ if (peekCurHelper() == '(') {
+ //note: we don't swallow the (here!
+
+ //either node type of function name
+ if (isNodeTypeName(name)) {
+ if (name == "processing-instruction")
+ return Token(PI, name);
+
+ return Token(NODETYPE, name);
+ }
+ //must be a function name.
+ return Token(FUNCTIONNAME, name);
+ }
+
+ // At this point, it must be NAMETEST.
+ return Token(NAMETEST, name);
+}
+
+Token Parser::nextToken()
+{
+ Token toRet = nextTokenInternal();
+ m_lastTokenType = toRet.type;
+ return toRet;
+}
+
+Parser::Parser()
+{
+ reset(String());
+}
+
+void Parser::reset(const String& data)
+{
+ m_nextPos = 0;
+ m_data = data;
+ m_lastTokenType = 0;
+
+ m_topExpr = 0;
+ m_gotNamespaceError = false;
+}
+
+int Parser::lex(void* data)
+{
+ YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
+ Token tok = nextToken();
+
+ switch (tok.type) {
+ case AXISNAME:
+ yylval->axis = tok.axis;
+ break;
+ case MULOP:
+ yylval->numop = tok.numop;
+ break;
+ case RELOP:
+ case EQOP:
+ yylval->eqop = tok.eqop;
+ break;
+ case NODETYPE:
+ case PI:
+ case FUNCTIONNAME:
+ case LITERAL:
+ case VARIABLEREFERENCE:
+ case NUMBER:
+ case NAMETEST:
+ yylval->str = new String(tok.str);
+ registerString(yylval->str);
+ break;
+ }
+
+ return tok.type;
+}
+
+bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
+{
+ int colon = qName.find(':');
+ if (colon >= 0) {
+ if (!m_resolver)
+ return false;
+ namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
+ if (namespaceURI.isNull())
+ return false;
+ localName = qName.substring(colon + 1);
+ } else
+ localName = qName;
+
+ return true;
+}
+
+Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
+{
+ reset(statement);
+
+ m_resolver = resolver;
+
+ Parser* oldParser = currentParser;
+ currentParser = this;
+ int parseError = xpathyyparse(this);
+ currentParser = oldParser;
+
+ if (parseError) {
+ deleteAllValues(m_parseNodes);
+ m_parseNodes.clear();
+
+ HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
+ for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
+ deleteAllValues(**it);
+ delete *it;
+ }
+ m_predicateVectors.clear();
+
+ HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
+ for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
+ deleteAllValues(**it);
+ delete *it;
+ }
+ m_expressionVectors.clear();
+
+ deleteAllValues(m_strings);
+ m_strings.clear();
+
+ deleteAllValues(m_nodeTests);
+ m_nodeTests.clear();
+
+ m_topExpr = 0;
+
+ if (m_gotNamespaceError)
+ ec = NAMESPACE_ERR;
+ else
+ ec = XPathException::INVALID_EXPRESSION_ERR;
+ return 0;
+ }
+
+ ASSERT(m_parseNodes.size() == 1);
+ ASSERT(*m_parseNodes.begin() == m_topExpr);
+ ASSERT(m_expressionVectors.size() == 0);
+ ASSERT(m_predicateVectors.size() == 0);
+ ASSERT(m_strings.size() == 0);
+ ASSERT(m_nodeTests.size() == 0);
+
+ m_parseNodes.clear();
+ Expression* result = m_topExpr;
+ m_topExpr = 0;
+
+ return result;
+}
+
+void Parser::registerParseNode(ParseNode* node)
+{
+ if (node == 0)
+ return;
+
+ ASSERT(!m_parseNodes.contains(node));
+
+ m_parseNodes.add(node);
+}
+
+void Parser::unregisterParseNode(ParseNode* node)
+{
+ if (node == 0)
+ return;
+
+ ASSERT(m_parseNodes.contains(node));
+
+ m_parseNodes.remove(node);
+}
+
+void Parser::registerPredicateVector(Vector<Predicate*>* vector)
+{
+ if (vector == 0)
+ return;
+
+ ASSERT(!m_predicateVectors.contains(vector));
+
+ m_predicateVectors.add(vector);
+}
+
+void Parser::deletePredicateVector(Vector<Predicate*>* vector)
+{
+ if (vector == 0)
+ return;
+
+ ASSERT(m_predicateVectors.contains(vector));
+
+ m_predicateVectors.remove(vector);
+ delete vector;
+}
+
+
+void Parser::registerExpressionVector(Vector<Expression*>* vector)
+{
+ if (vector == 0)
+ return;
+
+ ASSERT(!m_expressionVectors.contains(vector));
+
+ m_expressionVectors.add(vector);
+}
+
+void Parser::deleteExpressionVector(Vector<Expression*>* vector)
+{
+ if (vector == 0)
+ return;
+
+ ASSERT(m_expressionVectors.contains(vector));
+
+ m_expressionVectors.remove(vector);
+ delete vector;
+}
+
+void Parser::registerString(String* s)
+{
+ if (s == 0)
+ return;
+
+ ASSERT(!m_strings.contains(s));
+
+ m_strings.add(s);
+}
+
+void Parser::deleteString(String* s)
+{
+ if (s == 0)
+ return;
+
+ ASSERT(m_strings.contains(s));
+
+ m_strings.remove(s);
+ delete s;
+}
+
+void Parser::registerNodeTest(Step::NodeTest* t)
+{
+ if (t == 0)
+ return;
+
+ ASSERT(!m_nodeTests.contains(t));
+
+ m_nodeTests.add(t);
+}
+
+void Parser::deleteNodeTest(Step::NodeTest* t)
+{
+ if (t == 0)
+ return;
+
+ ASSERT(m_nodeTests.contains(t));
+
+ m_nodeTests.remove(t);
+ delete t;
+}
+
+}
+}
+
+#endif // ENABLE(XPATH)