summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/wrec
diff options
context:
space:
mode:
Diffstat (limited to 'JavaScriptCore/wrec')
-rw-r--r--JavaScriptCore/wrec/CharacterClassConstructor.cpp360
-rw-r--r--JavaScriptCore/wrec/CharacterClassConstructor.h121
-rw-r--r--JavaScriptCore/wrec/WREC.cpp1324
-rw-r--r--JavaScriptCore/wrec/WREC.h258
4 files changed, 2063 insertions, 0 deletions
diff --git a/JavaScriptCore/wrec/CharacterClassConstructor.cpp b/JavaScriptCore/wrec/CharacterClassConstructor.cpp
new file mode 100644
index 0000000..85eccaa
--- /dev/null
+++ b/JavaScriptCore/wrec/CharacterClassConstructor.cpp
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "CharacterClassConstructor.h"
+
+#if ENABLE(WREC)
+
+#include "pcre_internal.h"
+#include <wtf/ASCIICType.h>
+
+using namespace WTF;
+
+namespace JSC {
+
+static const UChar asciiNewlines[2] = { '\n', '\r' };
+static const UChar unicodeNewlines[2] = { 0x2028, 0x2029 };
+CharacterClass& getCharacterClassNewline() {
+ static CharacterClass charClass = {
+ asciiNewlines, 2,
+ 0, 0,
+ unicodeNewlines, 2,
+ 0, 0,
+ };
+
+ return charClass;
+}
+
+static const CharacterClassRange asciiDigitsRange[1] = { { '0', '9' } };
+CharacterClass& getCharacterClassDigits() {
+ static CharacterClass charClass = {
+ 0, 0,
+ asciiDigitsRange, 1,
+ 0, 0,
+ 0, 0,
+ };
+
+ return charClass;
+}
+
+static const UChar asciiSpaces[1] = { ' ' };
+static const CharacterClassRange asciiSpacesRange[1] = { { '\t', '\r' } };
+static const UChar unicodeSpaces[8] = { 0x00a0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000 };
+static const CharacterClassRange unicodeSpacesRange[1] = { { 0x2000, 0x200a } };
+CharacterClass& getCharacterClassSpaces() {
+ static CharacterClass charClass = {
+ asciiSpaces, 1,
+ asciiSpacesRange, 1,
+ unicodeSpaces, 8,
+ unicodeSpacesRange, 1,
+ };
+
+ return charClass;
+}
+
+static const UChar asciiWordchar[1] = { '_' };
+static const CharacterClassRange asciiWordcharRange[3] = { { '0', '9' }, { 'A', 'Z' }, { 'a', 'z' } };
+CharacterClass& getCharacterClassWordchar() {
+ static CharacterClass charClass = {
+ asciiWordchar, 1,
+ asciiWordcharRange, 3,
+ 0, 0,
+ 0, 0,
+ };
+
+ return charClass;
+}
+
+static const CharacterClassRange asciiNondigitsRange[2] = { { 0, '0' - 1 }, { '9' + 1, 0x7f } };
+static const CharacterClassRange unicodeNondigitsRange[1] = { { 0x0080, 0xffff } };
+CharacterClass& getCharacterClassNondigits() {
+ static CharacterClass charClass = {
+ 0, 0,
+ asciiNondigitsRange, 2,
+ 0, 0,
+ unicodeNondigitsRange, 1,
+ };
+
+ return charClass;
+}
+
+static const CharacterClassRange asciiNonspacesRange[3] = { { 0, '\t' - 1 }, { '\r' + 1, ' ' - 1 }, { ' ' + 1, 0x7f } };
+static const CharacterClassRange unicodeNonspacesRange[9] = {
+ { 0x0080, 0x009f },
+ { 0x00a1, 0x167f },
+ { 0x1681, 0x180d },
+ { 0x180f, 0x1fff },
+ { 0x200b, 0x2027 },
+ { 0x202a, 0x202e },
+ { 0x2030, 0x205e },
+ { 0x2060, 0x2fff },
+ { 0x3001, 0xffff }
+};
+CharacterClass& getCharacterClassNonspaces() {
+ static CharacterClass charClass = {
+ 0, 0,
+ asciiNonspacesRange, 3,
+ 0, 0,
+ unicodeNonspacesRange, 9,
+ };
+
+ return charClass;
+}
+
+static const UChar asciiNonwordchar[1] = { '`' };
+static const CharacterClassRange asciiNonwordcharRange[4] = { { 0, '0' - 1 }, { '9' + 1, 'A' - 1 }, { 'Z' + 1, '_' - 1 }, { 'z' + 1, 0x7f } };
+static const CharacterClassRange unicodeNonwordcharRange[1] = { { 0x0080, 0xffff } };
+CharacterClass& getCharacterClassNonwordchar() {
+ static CharacterClass charClass = {
+ asciiNonwordchar, 1,
+ asciiNonwordcharRange, 4,
+ 0, 0,
+ unicodeNonwordcharRange, 1,
+ };
+
+ return charClass;
+}
+
+void CharacterClassConstructor::addSorted(Vector<UChar>& matches, UChar ch)
+{
+ unsigned pos = 0;
+ unsigned range = matches.size();
+
+ // binary chop, find position to insert char.
+ while (range) {
+ unsigned index = range >> 1;
+
+ int val = matches[pos+index] - ch;
+ if (!val)
+ return;
+ else if (val > 0)
+ range = index;
+ else {
+ pos += (index+1);
+ range -= (index+1);
+ }
+ }
+
+ if (pos == matches.size())
+ matches.append(ch);
+ else
+ matches.insert(pos, ch);
+}
+
+void CharacterClassConstructor::addSortedRange(Vector<CharacterClassRange>& ranges, UChar lo, UChar hi)
+{
+ unsigned end = ranges.size();
+
+ // Simple linear scan - I doubt there are that many ranges anyway...
+ // feel free to fix this with something faster (eg binary chop).
+ for (unsigned i = 0; i < end; ++i) {
+ // does the new range fall before the current position in the array
+ if (hi < ranges[i].begin) {
+ // optional optimization: concatenate appending ranges? - may not be worthwhile.
+ if (hi == (ranges[i].begin - 1)) {
+ ranges[i].begin = lo;
+ return;
+ }
+ CharacterClassRange r = {lo, hi};
+ ranges.insert(i, r);
+ return;
+ }
+ // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
+ // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
+ // end of the last range they concatenate, which is just as good.
+ if (lo <= (ranges[i].end + 1)) {
+ // found an intersect! we'll replace this entry in the array.
+ ranges[i].begin = std::min(ranges[i].begin, lo);
+ ranges[i].end = std::max(ranges[i].end, hi);
+
+ // now check if the new range can subsume any subsequent ranges.
+ unsigned next = i+1;
+ // each iteration of the loop we will either remove something from the list, or break the loop.
+ while (next < ranges.size()) {
+ if (ranges[next].begin <= (ranges[i].end + 1)) {
+ // the next entry now overlaps / concatenates this one.
+ ranges[i].end = std::max(ranges[i].end, ranges[next].end);
+ ranges.remove(next);
+ } else
+ break;
+ }
+
+ return;
+ }
+ }
+
+ // Range comes after all existing ranges.
+ CharacterClassRange r = {lo, hi};
+ ranges.append(r);
+}
+
+void CharacterClassConstructor::put(UChar ch)
+{
+ // Parsing a regular expression like [a-z], we start in an initial empty state:
+ // ((m_charBuffer == -1) && !m_isPendingDash)
+ // When buffer the 'a' sice it may be (and is in this case) part of a range:
+ // ((m_charBuffer != -1) && !m_isPendingDash)
+ // Having parsed the hyphen we then record that the dash is also pending:
+ // ((m_charBuffer != -1) && m_isPendingDash)
+ // The next change will always take us back to the initial state - either because
+ // a complete range has been parsed (such as [a-z]), or because a flush is forced,
+ // due to an early end in the regexp ([a-]), or a character class escape being added
+ // ([a-\s]). The fourth permutation of m_charBuffer and m_isPendingDash is not permitted.
+ ASSERT(!((m_charBuffer == -1) && m_isPendingDash));
+
+ if (m_charBuffer != -1) {
+ if (m_isPendingDash) {
+ // EXAMPLE: parsing [-a-c], the 'c' reaches this case - we have buffered a previous character and seen a hyphen, so this is a range.
+ UChar lo = m_charBuffer;
+ UChar hi = ch;
+ // Reset back to the inital state.
+ m_charBuffer = -1;
+ m_isPendingDash = false;
+
+ // This is an error, detected lazily. Do not proceed.
+ if (lo > hi) {
+ m_isUpsideDown = true;
+ return;
+ }
+
+ if (lo <= 0x7f) {
+ char asciiLo = lo;
+ char asciiHi = std::min(hi, (UChar)0x7f);
+ addSortedRange(m_ranges, lo, asciiHi);
+
+ if (m_isCaseInsensitive) {
+ if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
+ if ((asciiLo <= 'z') && (asciiHi >= 'a'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
+ }
+ }
+ if (hi >= 0x80) {
+ UChar unicodeCurr = std::max(lo, (UChar)0x80);
+ addSortedRange(m_rangesUnicode, unicodeCurr, hi);
+
+ if (m_isCaseInsensitive) {
+ // we're going to scan along, updating the start of the range
+ while (unicodeCurr <= hi) {
+ // Spin forwards over any characters that don't have two cases.
+ for (; kjs_pcre_ucp_othercase(unicodeCurr) == -1; ++unicodeCurr) {
+ // if this was the last character in the range, we're done.
+ if (unicodeCurr == hi)
+ return;
+ }
+ // if we fall through to here, unicodeCurr <= hi & has another case. Get the other case.
+ UChar rangeStart = unicodeCurr;
+ UChar otherCurr = kjs_pcre_ucp_othercase(unicodeCurr);
+
+ // If unicodeCurr is not yet hi, check the next char in the range. If it also has another case,
+ // and if it's other case value is one greater then the othercase value for the current last
+ // character included in the range, we can include next into the range.
+ while ((unicodeCurr < hi) && (kjs_pcre_ucp_othercase(unicodeCurr + 1) == (otherCurr + 1))) {
+ // increment unicodeCurr; it points to the end of the range.
+ // increment otherCurr, due to the check above other for next must be 1 greater than the currrent other value.
+ ++unicodeCurr;
+ ++otherCurr;
+ }
+
+ // otherChar is the last in the range of other case chars, calculate offset to get back to the start.
+ addSortedRange(m_rangesUnicode, otherCurr-(unicodeCurr-rangeStart), otherCurr);
+
+ // unicodeCurr has been added, move on to the next char.
+ ++unicodeCurr;
+ }
+ }
+ }
+ } else if (ch == '-')
+ // EXAMPLE: parsing [-a-c], the second '-' reaches this case - the hyphen is treated as potentially indicating a range.
+ m_isPendingDash = true;
+ else {
+ // EXAMPLE: Parsing [-a-c], the 'a' reaches this case - we repace the previously buffered char with the 'a'.
+ flush();
+ m_charBuffer = ch;
+ }
+ } else
+ // EXAMPLE: Parsing [-a-c], the first hyphen reaches this case - there is no buffered character
+ // (the hyphen not treated as a special character in this case, same handling for any char).
+ m_charBuffer = ch;
+}
+
+// When a character is added to the set we do not immediately add it to the arrays, in case it is actually defining a range.
+// When we have determined the character is not used in specifing a range it is added, in a sorted fashion, to the appropriate
+// array (either ascii or unicode).
+// If the pattern is case insensitive we add entries for both cases.
+void CharacterClassConstructor::flush()
+{
+ if (m_charBuffer != -1) {
+ if (m_charBuffer <= 0x7f) {
+ if (m_isCaseInsensitive && isASCIILower(m_charBuffer))
+ addSorted(m_matches, toASCIIUpper(m_charBuffer));
+ addSorted(m_matches, m_charBuffer);
+ if (m_isCaseInsensitive && isASCIIUpper(m_charBuffer))
+ addSorted(m_matches, toASCIILower(m_charBuffer));
+ } else {
+ addSorted(m_matchesUnicode, m_charBuffer);
+ if (m_isCaseInsensitive) {
+ int other = kjs_pcre_ucp_othercase(m_charBuffer);
+ if (other != -1)
+ addSorted(m_matchesUnicode, other);
+ }
+ }
+ m_charBuffer = -1;
+ }
+
+ if (m_isPendingDash) {
+ addSorted(m_matches, '-');
+ m_isPendingDash = false;
+ }
+}
+
+void CharacterClassConstructor::append(CharacterClass& other)
+{
+ // [x-\s] will add, 'x', '-', and all unicode spaces to new class (same as [x\s-]).
+ // Need to check the spec, really, but think this matches PCRE behaviour.
+ flush();
+
+ if (other.numMatches) {
+ for (size_t i = 0; i < other.numMatches; ++i)
+ addSorted(m_matches, other.matches[i]);
+ }
+ if (other.numRanges) {
+ for (size_t i = 0; i < other.numRanges; ++i)
+ addSortedRange(m_ranges, other.ranges[i].begin, other.ranges[i].end);
+ }
+ if (other.numMatchesUnicode) {
+ for (size_t i = 0; i < other.numMatchesUnicode; ++i)
+ addSorted(m_matchesUnicode, other.matchesUnicode[i]);
+ }
+ if (other.numRangesUnicode) {
+ for (size_t i = 0; i < other.numRangesUnicode; ++i)
+ addSortedRange(m_rangesUnicode, other.rangesUnicode[i].begin, other.rangesUnicode[i].end);
+ }
+}
+
+}
+
+#endif // ENABLE(WREC)
diff --git a/JavaScriptCore/wrec/CharacterClassConstructor.h b/JavaScriptCore/wrec/CharacterClassConstructor.h
new file mode 100644
index 0000000..5accdae
--- /dev/null
+++ b/JavaScriptCore/wrec/CharacterClassConstructor.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CharacterClassConstructor_h
+#define CharacterClassConstructor_h
+
+#if ENABLE(WREC)
+
+#include "ustring.h"
+
+namespace JSC {
+
+ struct CharacterClassRange {
+ UChar begin;
+ UChar end;
+ };
+
+ struct CharacterClass {
+ const UChar* matches;
+ unsigned numMatches;
+
+ const CharacterClassRange* ranges;
+ unsigned numRanges;
+
+ const UChar* matchesUnicode;
+ unsigned numMatchesUnicode;
+
+ const CharacterClassRange* rangesUnicode;
+ unsigned numRangesUnicode;
+ };
+
+ CharacterClass& getCharacterClassNewline();
+ CharacterClass& getCharacterClassDigits();
+ CharacterClass& getCharacterClassSpaces();
+ CharacterClass& getCharacterClassWordchar();
+ CharacterClass& getCharacterClassNondigits();
+ CharacterClass& getCharacterClassNonspaces();
+ CharacterClass& getCharacterClassNonwordchar();
+
+ class CharacterClassConstructor {
+ public:
+ CharacterClassConstructor(bool isCaseInsensitive)
+ : m_charBuffer(-1)
+ , m_isPendingDash(false)
+ , m_isCaseInsensitive(isCaseInsensitive)
+ , m_isUpsideDown(false)
+ {
+ }
+
+ void flush();
+
+ // We need to flush prior to an escaped hyphen to prevent it as being treated as indicating
+ // a range, e.g. [a\-c] we flush prior to adding the hyphen so that this is not treated as
+ // [a-c]. However, we do not want to flush if we have already seen a non escaped hyphen -
+ // e.g. [+-\-] should be treated the same as [+--], producing a range that will also match
+ // a comma.
+ void flushBeforeEscapedHyphen()
+ {
+ if (!m_isPendingDash)
+ flush();
+ }
+
+ void put(UChar ch);
+ void append(CharacterClass& other);
+
+ bool isUpsideDown() { return m_isUpsideDown; }
+
+ ALWAYS_INLINE CharacterClass charClass()
+ {
+ CharacterClass newCharClass = {
+ m_matches.begin(), m_matches.size(),
+ m_ranges.begin(), m_ranges.size(),
+ m_matchesUnicode.begin(), m_matchesUnicode.size(),
+ m_rangesUnicode.begin(), m_rangesUnicode.size(),
+ };
+
+ return newCharClass;
+ }
+
+ private:
+ void addSorted(Vector<UChar>& matches, UChar ch);
+ void addSortedRange(Vector<CharacterClassRange>& ranges, UChar lo, UChar hi);
+
+ int m_charBuffer;
+ bool m_isPendingDash;
+ bool m_isCaseInsensitive;
+ bool m_isUpsideDown;
+
+ Vector<UChar> m_matches;
+ Vector<CharacterClassRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterClassRange> m_rangesUnicode;
+ };
+
+}
+
+#endif // ENABLE(WREC)
+
+#endif // CharacterClassConstructor_h
diff --git a/JavaScriptCore/wrec/WREC.cpp b/JavaScriptCore/wrec/WREC.cpp
new file mode 100644
index 0000000..6d64a2c
--- /dev/null
+++ b/JavaScriptCore/wrec/WREC.cpp
@@ -0,0 +1,1324 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "WREC.h"
+
+#if ENABLE(WREC)
+
+#include "CharacterClassConstructor.h"
+#include "Machine.h"
+#include "pcre_internal.h"
+
+using namespace WTF;
+
+namespace JSC {
+
+class GenerateAtomFunctor {
+public:
+ virtual ~GenerateAtomFunctor() {}
+
+ virtual void generateAtom(WRECGenerator*, JmpSrcVector&) = 0;
+ virtual void backtrack(WRECGenerator*) = 0;
+};
+
+class GeneratePatternCharacterFunctor : public GenerateAtomFunctor {
+public:
+ GeneratePatternCharacterFunctor(const UChar ch)
+ : m_ch(ch)
+ {
+ }
+
+ virtual void generateAtom(WRECGenerator*, JmpSrcVector&);
+ virtual void backtrack(WRECGenerator*);
+
+private:
+ const UChar m_ch;
+};
+
+class GenerateCharacterClassFunctor : public GenerateAtomFunctor {
+public:
+ GenerateCharacterClassFunctor(CharacterClass* charClass, bool invert)
+ : m_charClass(charClass)
+ , m_invert(invert)
+ {
+ }
+
+ virtual void generateAtom(WRECGenerator*, JmpSrcVector&);
+ virtual void backtrack(WRECGenerator*);
+
+private:
+ CharacterClass* m_charClass;
+ bool m_invert;
+};
+
+class GenerateBackreferenceFunctor : public GenerateAtomFunctor {
+public:
+ GenerateBackreferenceFunctor(unsigned subpatternId)
+ : m_subpatternId(subpatternId)
+ {
+ }
+
+ virtual void generateAtom(WRECGenerator*, JmpSrcVector&);
+ virtual void backtrack(WRECGenerator*);
+
+private:
+ unsigned m_subpatternId;
+};
+
+class GenerateParenthesesNonGreedyFunctor : public GenerateAtomFunctor {
+public:
+ GenerateParenthesesNonGreedyFunctor(WRECGenerator::JmpDst start, WRECGenerator::JmpSrc success, WRECGenerator::JmpSrc fail)
+ : m_start(start)
+ , m_success(success)
+ , m_fail(fail)
+ {
+ }
+
+ virtual void generateAtom(WRECGenerator*, JmpSrcVector&);
+ virtual void backtrack(WRECGenerator*);
+
+private:
+ WRECGenerator::JmpDst m_start;
+ WRECGenerator::JmpSrc m_success;
+ WRECGenerator::JmpSrc m_fail;
+};
+
+void GeneratePatternCharacterFunctor::generateAtom(WRECGenerator* wrec, JmpSrcVector& failures)
+{
+ wrec->generatePatternCharacter(failures, m_ch);
+}
+void GeneratePatternCharacterFunctor::backtrack(WRECGenerator* wrec)
+{
+ wrec->generateBacktrack1();
+}
+
+void GenerateCharacterClassFunctor::generateAtom(WRECGenerator* wrec, JmpSrcVector& failures)
+{
+ wrec->generateCharacterClass(failures, *m_charClass, m_invert);
+}
+void GenerateCharacterClassFunctor::backtrack(WRECGenerator* wrec)
+{
+ wrec->generateBacktrack1();
+}
+
+void GenerateBackreferenceFunctor::generateAtom(WRECGenerator* wrec, JmpSrcVector& failures)
+{
+ wrec->generateBackreference(failures, m_subpatternId);
+}
+void GenerateBackreferenceFunctor::backtrack(WRECGenerator* wrec)
+{
+ wrec->generateBacktrackBackreference(m_subpatternId);
+}
+
+void GenerateParenthesesNonGreedyFunctor::generateAtom(WRECGenerator* wrec, JmpSrcVector& failures)
+{
+ wrec->generateParenthesesNonGreedy(failures, m_start, m_success, m_fail);
+}
+
+void GenerateParenthesesNonGreedyFunctor::backtrack(WRECGenerator*)
+{
+ // FIXME: do something about this.
+ CRASH();
+}
+
+void WRECGenerator::generateBacktrack1()
+{
+ m_jit.subl_i8r(1, currentPositionRegister);
+}
+
+void WRECGenerator::generateBacktrackBackreference(unsigned subpatternId)
+{
+ m_jit.subl_mr((2 * subpatternId + 1) * sizeof(int), outputRegister, currentPositionRegister);
+ m_jit.addl_mr((2 * subpatternId) * sizeof(int), outputRegister, currentPositionRegister);
+}
+
+void WRECGenerator::generateBackreferenceQuantifier(JmpSrcVector& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max)
+{
+ GenerateBackreferenceFunctor functor(subpatternId);
+
+ m_jit.movl_mr((2 * subpatternId) * sizeof(int), outputRegister, currentValueRegister);
+ m_jit.cmpl_rm(currentValueRegister, ((2 * subpatternId) + 1) * sizeof(int), outputRegister);
+ JmpSrc skipIfEmpty = m_jit.emitUnlinkedJe();
+
+ ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy);
+ if (quantifierType == Quantifier::Greedy)
+ generateGreedyQuantifier(failures, functor, min, max);
+ else
+ generateNonGreedyQuantifier(failures, functor, min, max);
+
+ m_jit.link(skipIfEmpty, m_jit.label());
+}
+
+void WRECGenerator::generateNonGreedyQuantifier(JmpSrcVector& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
+{
+ // comment me better!
+ JmpSrcVector newFailures;
+
+ // (0) Setup:
+ // init quantifierCountRegister
+ m_jit.pushl_r(quantifierCountRegister);
+ m_jit.xorl_rr(quantifierCountRegister, quantifierCountRegister);
+ JmpSrc gotoStart = m_jit.emitUnlinkedJmp();
+
+ // (4) Failure case
+
+ JmpDst quantifierFailed = m_jit.label();
+ // (4.1) Restore original value of quantifierCountRegister from the stack
+ m_jit.popl_r(quantifierCountRegister);
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ // (3) We just tried an alternative, and it failed - check we can try more.
+
+ JmpDst alternativeFailed = m_jit.label();
+ // (3.1) remove the value pushed prior to testing the alternative
+ m_jit.popl_r(currentPositionRegister);
+ // (3.2) if there is a limit, and we have reached it, game over.
+ if (max != Quantifier::noMaxSpecified) {
+ m_jit.cmpl_i32r(max, quantifierCountRegister);
+ m_jit.link(m_jit.emitUnlinkedJe(), quantifierFailed);
+ }
+
+ // (1) Do a check for the atom
+
+ // (1.0) This is where we start, if there is a minimum (then we must read at least one of the atom).
+ JmpDst testQuantifiedAtom = m_jit.label();
+ if (min)
+ m_jit.link(gotoStart, testQuantifiedAtom);
+ // (1.1) Do a check for the atom check.
+ functor.generateAtom(this, newFailures);
+ // (1.2) If we get here, successful match!
+ m_jit.addl_i8r(1, quantifierCountRegister);
+ // (1.3) We needed to read the atom, and we failed - that's terminally bad news.
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], quantifierFailed);
+ newFailures.clear();
+ // (1.4) If there is a minimum, check we have read enough ...
+ if (min) {
+ // ... except if min was 1 no need to keep checking!
+ if (min != 1) {
+ m_jit.cmpl_i32r(min, quantifierCountRegister);
+ m_jit.link(m_jit.emitUnlinkedJl(), testQuantifiedAtom);
+ }
+ } else
+ m_jit.link(gotoStart, m_jit.label());
+ // if there was no minimum, this is where we start.
+
+ // (2) Plant an alternative check for the remainder of the expression
+
+ // (2.1) recursively call to parseAlternative, if it falls through, success!
+ m_jit.pushl_r(currentPositionRegister);
+ m_parser.parseAlternative(newFailures);
+ m_jit.addl_i8r(4, X86::esp);
+ m_jit.popl_r(quantifierCountRegister);
+ // (2.2) link failure cases to jump back up to alternativeFailed.
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], alternativeFailed);
+ newFailures.clear();
+}
+
+void WRECGenerator::generateGreedyQuantifier(JmpSrcVector& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
+{
+ if (!max)
+ return;
+
+ // comment me better!
+ JmpSrcVector newFailures;
+
+ // (0) Setup:
+ // init quantifierCountRegister
+ m_jit.pushl_r(quantifierCountRegister);
+ m_jit.xorl_rr(quantifierCountRegister, quantifierCountRegister);
+
+ // (1) Greedily read as many of the atom as possible
+
+ JmpDst readMore = m_jit.label();
+
+ // (1.1) Do a character class check.
+ functor.generateAtom(this, newFailures);
+ // (1.2) If we get here, successful match!
+ m_jit.addl_i8r(1, quantifierCountRegister);
+ // (1.3) loop, unless we've read max limit.
+ if (max != Quantifier::noMaxSpecified) {
+ if (max != 1) {
+ // if there is a limit, only loop while less than limit, otherwise fall throught to...
+ m_jit.cmpl_i32r(max, quantifierCountRegister);
+ m_jit.link(m_jit.emitUnlinkedJne(), readMore);
+ }
+ // ...if there is no min we need jump to the alternative test, if there is we can just fall through to it.
+ if (!min)
+ newFailures.append(m_jit.emitUnlinkedJmp());
+ } else
+ m_jit.link(m_jit.emitUnlinkedJmp(), readMore);
+ // (1.4) check enough matches to bother trying an alternative...
+ if (min) {
+ // We will fall through to here if (min && max), after the max check.
+ // First, also link a
+ JmpDst minimumTest = m_jit.label();
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], minimumTest);
+ newFailures.clear();
+ //
+ m_jit.cmpl_i32r(min, quantifierCountRegister);
+ newFailures.append(m_jit.emitUnlinkedJae());
+ }
+
+ // (4) Failure case
+
+ JmpDst quantifierFailed = m_jit.label();
+ // (4.1) Restore original value of quantifierCountRegister from the stack
+ m_jit.popl_r(quantifierCountRegister);
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ // (3) Backtrack
+
+ JmpDst backtrack = m_jit.label();
+ // (3.1) this was preserved prior to executing the alternative
+ m_jit.popl_r(currentPositionRegister);
+ // (3.2) check we can retry with fewer matches - backtracking fails if already at the minimum
+ m_jit.cmpl_i32r(min, quantifierCountRegister);
+ m_jit.link(m_jit.emitUnlinkedJe(), quantifierFailed);
+ // (3.3) roll off one match, and retry.
+ functor.backtrack(this);
+ m_jit.subl_i8r(1, quantifierCountRegister);
+
+ // (2) Try an alternative.
+
+ // (2.1) point to retry
+ JmpDst tryAlternative = m_jit.label();
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], tryAlternative);
+ newFailures.clear();
+ // (2.2) recursively call to parseAlternative, if it falls through, success!
+ m_jit.pushl_r(currentPositionRegister);
+ m_parser.parseAlternative(newFailures);
+ m_jit.addl_i8r(4, X86::esp);
+ m_jit.popl_r(quantifierCountRegister);
+ // (2.3) link failure cases to here.
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], backtrack);
+ newFailures.clear();
+}
+
+void WRECGenerator::generatePatternCharacter(JmpSrcVector& failures, int ch)
+{
+ // check there is more input, read a char.
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ failures.append(m_jit.emitUnlinkedJe());
+ m_jit.movzwl_mr(inputRegister, currentPositionRegister, 2, currentValueRegister);
+
+ // used for unicode case insensitive
+ bool hasUpper = false;
+ JmpSrc isUpper;
+
+ // if case insensitive match
+ if (m_parser.m_ignoreCase) {
+ UChar lower, upper;
+
+ // check for ascii case sensitive characters
+ if (isASCIIAlpha(ch)) {
+ m_jit.orl_i32r(32, currentValueRegister);
+ ch |= 32;
+ } else if ((ch > 0x7f) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) {
+ // handle unicode case sentitive characters - branch to success on upper
+ m_jit.cmpl_i32r(upper, currentValueRegister);
+ isUpper = m_jit.emitUnlinkedJe();
+ hasUpper = true;
+ ch = lower;
+ }
+ }
+
+ // checks for ch, or lower case version of ch, if insensitive
+ m_jit.cmpl_i32r((unsigned short)ch, currentValueRegister);
+ failures.append(m_jit.emitUnlinkedJne());
+
+ if (m_parser.m_ignoreCase && hasUpper) {
+ // for unicode case insensitive matches, branch here if upper matches.
+ m_jit.link(isUpper, m_jit.label());
+ }
+
+ // on success consume the char
+ m_jit.addl_i8r(1, currentPositionRegister);
+}
+
+void WRECGenerator::generateCharacterClassInvertedRange(JmpSrcVector& failures, JmpSrcVector& matchDest, const CharacterClassRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
+{
+ do {
+ // pick which range we're going to generate
+ int which = count >> 1;
+ char lo = ranges[which].begin;
+ char hi = ranges[which].end;
+
+ m_jit.cmpl_i32r((unsigned short)lo, currentValueRegister);
+
+ // check if there are any ranges or matches below lo. If not, just jl to failure -
+ // if there is anything else to check, check that first, if it falls through jmp to failure.
+ if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ JmpSrc loOrAbove = m_jit.emitUnlinkedJge();
+
+ // generate code for all ranges before this one
+ if (which)
+ generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+
+ do {
+ m_jit.cmpl_i32r((unsigned short)matches[*matchIndex], currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJe());
+ ++*matchIndex;
+ } while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo));
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ m_jit.link(loOrAbove, m_jit.label());
+ } else if (which) {
+ JmpSrc loOrAbove = m_jit.emitUnlinkedJge();
+
+ generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ m_jit.link(loOrAbove, m_jit.label());
+ } else
+ failures.append(m_jit.emitUnlinkedJl());
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
+ ++*matchIndex;
+
+ m_jit.cmpl_i32r((unsigned short)hi, currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJle());
+ // fall through to here, the value is above hi.
+
+ // shuffle along & loop around if there are any more matches to handle.
+ unsigned next = which + 1;
+ ranges += next;
+ count -= next;
+ } while (count);
+}
+
+void WRECGenerator::generateCharacterClassInverted(JmpSrcVector& matchDest, CharacterClass& charClass)
+{
+ JmpSrc unicodeFail;
+ if (charClass.numMatchesUnicode || charClass.numRangesUnicode) {
+ m_jit.cmpl_i8r(0x7f, currentValueRegister);
+ JmpSrc isAscii = m_jit.emitUnlinkedJle();
+
+ if (charClass.numMatchesUnicode) {
+ for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) {
+ UChar ch = charClass.matchesUnicode[i];
+ m_jit.cmpl_i32r((unsigned short)ch, currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJe());
+ }
+ }
+
+ if (charClass.numRangesUnicode) {
+ for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) {
+ UChar lo = charClass.rangesUnicode[i].begin;
+ UChar hi = charClass.rangesUnicode[i].end;
+
+ m_jit.cmpl_i32r((unsigned short)lo, currentValueRegister);
+ JmpSrc below = m_jit.emitUnlinkedJl();
+ m_jit.cmpl_i32r((unsigned short)hi, currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJle());
+ m_jit.link(below, m_jit.label());
+ }
+ }
+
+ unicodeFail = m_jit.emitUnlinkedJmp();
+ m_jit.link(isAscii, m_jit.label());
+ }
+
+ if (charClass.numRanges) {
+ unsigned matchIndex = 0;
+ JmpSrcVector failures;
+ generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches);
+ while (matchIndex < charClass.numMatches) {
+ m_jit.cmpl_i32r((unsigned short)charClass.matches[matchIndex], currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJe());
+ ++matchIndex;
+ }
+ JmpDst noMatch = m_jit.label();
+ for (unsigned i = 0; i < failures.size(); ++i)
+ m_jit.link(failures[i], noMatch);
+ failures.clear();
+ } else if (charClass.numMatches) {
+ // optimization: gather 'a','A' etc back together, can mask & test once.
+ Vector<char> matchesAZaz;
+
+ for (unsigned i = 0; i < charClass.numMatches; ++i) {
+ char ch = charClass.matches[i];
+ if (m_parser.m_ignoreCase) {
+ if (isASCIILower(ch)) {
+ matchesAZaz.append(ch);
+ continue;
+ }
+ if (isASCIIUpper(ch))
+ continue;
+ }
+ m_jit.cmpl_i32r((unsigned short)ch, currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJe());
+ }
+
+ if (unsigned countAZaz = matchesAZaz.size()) {
+ m_jit.orl_i32r(32, currentValueRegister);
+
+ for (unsigned i = 0; i < countAZaz; ++i) {
+ char ch = matchesAZaz[i];
+ m_jit.cmpl_i32r((unsigned short)ch, currentValueRegister);
+ matchDest.append(m_jit.emitUnlinkedJe());
+ }
+ }
+ }
+
+ if (charClass.numMatchesUnicode || charClass.numRangesUnicode)
+ m_jit.link(unicodeFail, m_jit.label());
+}
+
+void WRECGenerator::generateCharacterClass(JmpSrcVector& failures, CharacterClass& charClass, bool invert)
+{
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ failures.append(m_jit.emitUnlinkedJe());
+ m_jit.movzwl_mr(inputRegister, currentPositionRegister, 2, currentValueRegister);
+
+ if (invert)
+ generateCharacterClassInverted(failures, charClass);
+ else {
+ JmpSrcVector successes;
+ generateCharacterClassInverted(successes, charClass);
+ failures.append(m_jit.emitUnlinkedJmp());
+ JmpDst here = m_jit.label();
+ for (unsigned i = 0; i < successes.size(); ++i)
+ m_jit.link(successes[i], here);
+ }
+
+ m_jit.addl_i8r(1, currentPositionRegister);
+}
+
+WRECGenerator::JmpSrc WRECGenerator::generateParentheses(ParenthesesType type)
+{
+ unsigned subpatternId = (type == capturing) ? ++m_parser.m_numSubpatterns : m_parser.m_numSubpatterns;
+
+ // push pos, both to preserve for fail + reloaded in parseDisjunction
+ m_jit.pushl_r(currentPositionRegister);
+
+ // Plant a disjunction, wrapped to invert behaviour -
+ JmpSrcVector newFailures;
+ m_parser.parseDisjunction(newFailures);
+
+ if (type == capturing) {
+ m_jit.popl_r(currentValueRegister);
+ m_jit.movl_rm(currentValueRegister, (2 * subpatternId) * sizeof(int), outputRegister);
+ m_jit.movl_rm(currentPositionRegister, (2 * subpatternId + 1) * sizeof(int), outputRegister);
+ } else if (type == non_capturing)
+ m_jit.addl_i8r(4, X86::esp);
+ else
+ m_jit.popl_r(currentPositionRegister);
+
+ // This is a little lame - jump to jump if there is a nested disjunction.
+ // (suggestion to fix: make parseDisjunction populate a JmpSrcVector of
+ // disjunct successes... this is probably not worth the compile cost in
+ // the common case to fix).
+ JmpSrc successfulMatch = m_jit.emitUnlinkedJmp();
+
+ JmpDst originalFailure = m_jit.label();
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], originalFailure);
+ newFailures.clear();
+ // fail: restore currentPositionRegister
+ m_jit.popl_r(currentPositionRegister);
+
+ JmpSrc jumpToFail;
+ // If this was an inverted assert, fail = success! - just let the failure case drop through,
+ // success case goes to failures. Both paths restore curr pos.
+ if (type == inverted_assertion)
+ jumpToFail = successfulMatch;
+ else {
+ // plant a jump so any fail will link off to 'failures',
+ jumpToFail = m_jit.emitUnlinkedJmp();
+ // link successes to jump here
+ m_jit.link(successfulMatch, m_jit.label());
+ }
+ return jumpToFail;
+}
+
+void WRECGenerator::generateParenthesesNonGreedy(JmpSrcVector& failures, JmpDst start, JmpSrc success, JmpSrc fail)
+{
+ m_jit.link(m_jit.emitUnlinkedJmp(), start);
+ m_jit.link(success, m_jit.label());
+
+ failures.append(fail);
+}
+
+WRECGenerator::JmpSrc WRECGenerator::generateParenthesesResetTrampoline(JmpSrcVector& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter)
+{
+ JmpSrc skip = m_jit.emitUnlinkedJmp();
+
+ JmpDst subPatternResetTrampoline = m_jit.label();
+ for (unsigned i = 0; i < newFailures.size(); ++i)
+ m_jit.link(newFailures[i], subPatternResetTrampoline);
+ newFailures.clear();
+ for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) {
+ m_jit.movl_i32m(-1, (2 * i) * sizeof(int), outputRegister);
+ m_jit.movl_i32m(-1, (2 * i + 1) * sizeof(int), outputRegister);
+ }
+
+ JmpSrc newFailJump = m_jit.emitUnlinkedJmp();
+ m_jit.link(skip, m_jit.label());
+
+ return newFailJump;
+}
+
+void WRECGenerator::generateAssertionBOL(JmpSrcVector& failures)
+{
+ if (m_parser.m_multiline) {
+ JmpSrcVector previousIsNewline;
+
+ // begin of input == success
+ m_jit.cmpl_i8r(0, currentPositionRegister);
+ previousIsNewline.append(m_jit.emitUnlinkedJe());
+
+ // now check prev char against newline characters.
+ m_jit.movzwl_mr(-2, inputRegister, currentPositionRegister, 2, currentValueRegister);
+ generateCharacterClassInverted(previousIsNewline, getCharacterClassNewline());
+
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ JmpDst success = m_jit.label();
+ for (unsigned i = 0; i < previousIsNewline.size(); ++i)
+ m_jit.link(previousIsNewline[i], success);
+ previousIsNewline.clear();
+ } else {
+ m_jit.cmpl_i8r(0, currentPositionRegister);
+ failures.append(m_jit.emitUnlinkedJne());
+ }
+}
+
+void WRECGenerator::generateAssertionEOL(JmpSrcVector& failures)
+{
+ if (m_parser.m_multiline) {
+ JmpSrcVector nextIsNewline;
+
+ // end of input == success
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ nextIsNewline.append(m_jit.emitUnlinkedJe());
+
+ // now check next char against newline characters.
+ m_jit.movzwl_mr(inputRegister, currentPositionRegister, 2, currentValueRegister);
+ generateCharacterClassInverted(nextIsNewline, getCharacterClassNewline());
+
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ JmpDst success = m_jit.label();
+ for (unsigned i = 0; i < nextIsNewline.size(); ++i)
+ m_jit.link(nextIsNewline[i], success);
+ nextIsNewline.clear();
+ } else {
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ failures.append(m_jit.emitUnlinkedJne());
+ }
+}
+
+void WRECGenerator::generateAssertionWordBoundary(JmpSrcVector& failures, bool invert)
+{
+ JmpSrcVector wordBoundary;
+ JmpSrcVector notWordBoundary;
+
+ // (1) Check if the previous value was a word char
+
+ // (1.1) check for begin of input
+ m_jit.cmpl_i8r(0, currentPositionRegister);
+ JmpSrc atBegin = m_jit.emitUnlinkedJe();
+ // (1.2) load the last char, and chck if is word character
+ m_jit.movzwl_mr(-2, inputRegister, currentPositionRegister, 2, currentValueRegister);
+ JmpSrcVector previousIsWord;
+ generateCharacterClassInverted(previousIsWord, getCharacterClassWordchar());
+ // (1.3) if we get here, previous is not a word char
+ m_jit.link(atBegin, m_jit.label());
+
+ // (2) Handle situation where previous was NOT a \w
+
+ // (2.1) check for end of input
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ notWordBoundary.append(m_jit.emitUnlinkedJe());
+ // (2.2) load the next char, and chck if is word character
+ m_jit.movzwl_mr(inputRegister, currentPositionRegister, 2, currentValueRegister);
+ generateCharacterClassInverted(wordBoundary, getCharacterClassWordchar());
+ // (2.3) If we get here, neither chars are word chars
+ notWordBoundary.append(m_jit.emitUnlinkedJmp());
+
+ // (3) Handle situation where previous was a \w
+
+ // (3.0) link success in first match to here
+ JmpDst section3 = m_jit.label();
+ for (unsigned i = 0; i < previousIsWord.size(); ++i)
+ m_jit.link(previousIsWord[i], section3);
+ previousIsWord.clear();
+ // (3.1) check for end of input
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ wordBoundary.append(m_jit.emitUnlinkedJe());
+ // (3.2) load the next char, and chck if is word character
+ m_jit.movzwl_mr(inputRegister, currentPositionRegister, 2, currentValueRegister);
+ generateCharacterClassInverted(notWordBoundary, getCharacterClassWordchar());
+ // (3.3) If we get here, this is an end of a word, within the input.
+
+ // (4) Link everything up
+
+ if (invert) {
+ // handle the fall through case
+ wordBoundary.append(m_jit.emitUnlinkedJmp());
+
+ // looking for non word boundaries, so link boundary fails to here.
+ JmpDst success = m_jit.label();
+ for (unsigned i = 0; i < notWordBoundary.size(); ++i)
+ m_jit.link(notWordBoundary[i], success);
+ notWordBoundary.clear();
+
+ failures.append(wordBoundary.begin(), wordBoundary.size());
+ } else {
+ // looking for word boundaries, so link successes here.
+ JmpDst success = m_jit.label();
+ for (unsigned i = 0; i < wordBoundary.size(); ++i)
+ m_jit.link(wordBoundary[i], success);
+ wordBoundary.clear();
+
+ failures.append(notWordBoundary.begin(), notWordBoundary.size());
+ }
+}
+
+void WRECGenerator::generateBackreference(JmpSrcVector& failures, unsigned subpatternId)
+{
+ m_jit.pushl_r(currentPositionRegister);
+ m_jit.pushl_r(quantifierCountRegister);
+
+ // get the start pos of the backref into quantifierCountRegister (multipurpose!)
+ m_jit.movl_mr((2 * subpatternId) * sizeof(int), outputRegister, quantifierCountRegister);
+
+ JmpSrc skipIncrement = m_jit.emitUnlinkedJmp();
+ JmpDst topOfLoop = m_jit.label();
+ m_jit.addl_i8r(1, currentPositionRegister);
+ m_jit.addl_i8r(1, quantifierCountRegister);
+ m_jit.link(skipIncrement, m_jit.label());
+
+ // check if we're at the end of backref (if we are, success!)
+ m_jit.cmpl_rm(quantifierCountRegister, ((2 * subpatternId) + 1) * sizeof(int), outputRegister);
+ JmpSrc endOfBackRef = m_jit.emitUnlinkedJe();
+
+ m_jit.movzwl_mr(inputRegister, quantifierCountRegister, 2, currentValueRegister);
+
+ // check if we've run out of input (this would be a can o'fail)
+ m_jit.cmpl_rr(lengthRegister, currentPositionRegister);
+ JmpSrc endOfInput = m_jit.emitUnlinkedJe();
+
+ m_jit.cmpw_rm(currentValueRegister, inputRegister, currentPositionRegister, 2);
+ m_jit.link(m_jit.emitUnlinkedJe(), topOfLoop);
+
+ m_jit.link(endOfInput, m_jit.label());
+ // Failure
+ m_jit.popl_r(quantifierCountRegister);
+ m_jit.popl_r(currentPositionRegister);
+ failures.append(m_jit.emitUnlinkedJmp());
+
+ // Success
+ m_jit.link(endOfBackRef, m_jit.label());
+ m_jit.popl_r(quantifierCountRegister);
+ m_jit.addl_i8r(4, X86::esp);
+}
+
+void WRECGenerator::generateDisjunction(JmpSrcVector& successes, JmpSrcVector& failures)
+{
+ successes.append(m_jit.emitUnlinkedJmp());
+
+ JmpDst here = m_jit.label();
+
+ for (unsigned i = 0; i < failures.size(); ++i)
+ m_jit.link(failures[i], here);
+ failures.clear();
+
+ m_jit.movl_mr(X86::esp, currentPositionRegister);
+}
+
+void WRECGenerator::terminateDisjunction(JmpSrcVector& successes)
+{
+ JmpDst here = m_jit.label();
+ for (unsigned i = 0; i < successes.size(); ++i)
+ m_jit.link(successes[i], here);
+ successes.clear();
+}
+
+ALWAYS_INLINE Quantifier WRECParser::parseGreedyQuantifier()
+{
+ switch (peek()) {
+ case '?':
+ consume();
+ return Quantifier(Quantifier::Greedy, 0, 1);
+
+ case '*':
+ consume();
+ return Quantifier(Quantifier::Greedy, 0);
+
+ case '+':
+ consume();
+ return Quantifier(Quantifier::Greedy, 1);
+
+ case '{': {
+ consume();
+ // a numeric quantifier should always have a lower bound
+ if (!peekIsDigit()) {
+ m_err = Error_malformedQuantifier;
+ return Quantifier(Quantifier::Error);
+ }
+ int min = consumeNumber();
+
+ // this should either be a , or a }
+ switch (peek()) {
+ case '}':
+ // {n} - exactly n times. (technically I think a '?' is valid in the bnf - bit meaningless).
+ consume();
+ return Quantifier(Quantifier::Greedy, min, min);
+
+ case ',':
+ consume();
+ switch (peek()) {
+ case '}':
+ // {n,} - n to inf times.
+ consume();
+ return Quantifier(Quantifier::Greedy, min);
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ // {n,m} - n to m times.
+ int max = consumeNumber();
+
+ if (peek() != '}') {
+ m_err = Error_malformedQuantifier;
+ return Quantifier(Quantifier::Error);
+ }
+ consume();
+
+ return Quantifier(Quantifier::Greedy, min, max);
+ }
+
+ default:
+ m_err = Error_malformedQuantifier;
+ return Quantifier(Quantifier::Error);
+ }
+
+ default:
+ m_err = Error_malformedQuantifier;
+ return Quantifier(Quantifier::Error);
+ }
+ }
+ // None of the above - no quantifier
+ default:
+ return Quantifier();
+ }
+}
+
+Quantifier WRECParser::parseQuantifier()
+{
+ Quantifier q = parseGreedyQuantifier();
+
+ if ((q.type == Quantifier::Greedy) && (peek() == '?')) {
+ consume();
+ q.type = Quantifier::NonGreedy;
+ }
+
+ return q;
+}
+
+bool WRECParser::parsePatternCharacterQualifier(JmpSrcVector& failures, int ch)
+{
+ Quantifier q = parseQuantifier();
+
+ switch (q.type) {
+ case Quantifier::None: {
+ m_generator.generatePatternCharacter(failures, ch);
+ break;
+ }
+
+ case Quantifier::Greedy: {
+ GeneratePatternCharacterFunctor functor(ch);
+ m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max);
+ break;
+ }
+
+ case Quantifier::NonGreedy: {
+ GeneratePatternCharacterFunctor functor(ch);
+ m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max);
+ break;
+ }
+
+ case Quantifier::Error:
+ return false;
+ }
+
+ return true;
+}
+
+bool WRECParser::parseCharacterClassQuantifier(JmpSrcVector& failures, CharacterClass& charClass, bool invert)
+{
+ Quantifier q = parseQuantifier();
+
+ switch (q.type) {
+ case Quantifier::None: {
+ m_generator.generateCharacterClass(failures, charClass, invert);
+ break;
+ }
+
+ case Quantifier::Greedy: {
+ GenerateCharacterClassFunctor functor(&charClass, invert);
+ m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max);
+ break;
+ }
+
+ case Quantifier::NonGreedy: {
+ GenerateCharacterClassFunctor functor(&charClass, invert);
+ m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max);
+ break;
+ }
+
+ case Quantifier::Error:
+ return false;
+ }
+
+ return true;
+}
+
+bool WRECParser::parseBackreferenceQuantifier(JmpSrcVector& failures, unsigned subpatternId)
+{
+ Quantifier q = parseQuantifier();
+
+ switch (q.type) {
+ case Quantifier::None: {
+ m_generator.generateBackreference(failures, subpatternId);
+ break;
+ }
+
+ case Quantifier::Greedy:
+ case Quantifier::NonGreedy:
+ m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max);
+ return true;
+
+ case Quantifier::Error:
+ return false;
+ }
+
+ return true;
+}
+
+bool WRECParser::parseParentheses(JmpSrcVector&)
+{
+ // FIXME: We don't currently backtrack correctly within parentheses in cases such as
+ // "c".match(/(.*)c/) so we fall back to PCRE for any regexp containing parentheses.
+
+ m_err = TempError_unsupportedParentheses;
+ return false;
+}
+
+bool WRECParser::parseCharacterClass(JmpSrcVector& failures)
+{
+ bool invert = false;
+ if (peek() == '^') {
+ consume();
+ invert = true;
+ }
+
+ CharacterClassConstructor charClassConstructor(m_ignoreCase);
+
+ UChar ch;
+ while ((ch = peek()) != ']') {
+ switch (ch) {
+ case EndOfPattern:
+ m_err = Error_malformedCharacterClass;
+ return false;
+
+ case '\\':
+ consume();
+ switch (ch = peek()) {
+ case EndOfPattern:
+ m_err = Error_malformedEscape;
+ return false;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ charClassConstructor.put(consumeOctal());
+ break;
+
+ // ControlEscape
+ case 'b':
+ consume();
+ charClassConstructor.put('\b');
+ break;
+ case 'f':
+ consume();
+ charClassConstructor.put('\f');
+ break;
+ case 'n':
+ consume();
+ charClassConstructor.put('\n');
+ break;
+ case 'r':
+ consume();
+ charClassConstructor.put('\r');
+ break;
+ case 't':
+ consume();
+ charClassConstructor.put('\t');
+ break;
+ case 'v':
+ consume();
+ charClassConstructor.put('\v');
+ break;
+
+ // ControlLetter
+ case 'c': {
+ consume();
+ int control = consume();
+ if (!isASCIIAlpha(control)) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ charClassConstructor.put(control&31);
+ break;
+ }
+
+ // HexEscape
+ case 'x': {
+ consume();
+ int x = consumeHex(2);
+ if (x == -1) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ charClassConstructor.put(x);
+ break;
+ }
+
+ // UnicodeEscape
+ case 'u': {
+ consume();
+ int x = consumeHex(4);
+ if (x == -1) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ charClassConstructor.put(x);
+ break;
+ }
+
+ // CharacterClassEscape
+ case 'd':
+ consume();
+ charClassConstructor.append(getCharacterClassDigits());
+ break;
+ case 's':
+ consume();
+ charClassConstructor.append(getCharacterClassSpaces());
+ break;
+ case 'w':
+ consume();
+ charClassConstructor.append(getCharacterClassWordchar());
+ break;
+ case 'D':
+ consume();
+ charClassConstructor.append(getCharacterClassNondigits());
+ break;
+ case 'S':
+ consume();
+ charClassConstructor.append(getCharacterClassNonspaces());
+ break;
+ case 'W':
+ consume();
+ charClassConstructor.append(getCharacterClassNonwordchar());
+ break;
+
+ case '-':
+ consume();
+ charClassConstructor.flushBeforeEscapedHyphen();
+ charClassConstructor.put(ch);
+ break;
+
+ // IdentityEscape
+ default: {
+ // TODO: check this test for IdentifierPart.
+ int ch = consume();
+ if (isASCIIAlphanumeric(ch) || (ch == '_')) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ charClassConstructor.put(ch);
+ break;
+ }
+ }
+ break;
+
+ default:
+ consume();
+ charClassConstructor.put(ch);
+ }
+ }
+ consume();
+
+ // lazily catch reversed ranges ([z-a])in character classes
+ if (charClassConstructor.isUpsideDown()) {
+ m_err = Error_malformedCharacterClass;
+ return false;
+ }
+
+ charClassConstructor.flush();
+ CharacterClass charClass = charClassConstructor.charClass();
+ return parseCharacterClassQuantifier(failures, charClass, invert);
+}
+
+bool WRECParser::parseOctalEscape(JmpSrcVector& failures)
+{
+ return parsePatternCharacterQualifier(failures, consumeOctal());
+}
+
+bool WRECParser::parseEscape(JmpSrcVector& failures)
+{
+ switch (peek()) {
+ case EndOfPattern:
+ m_err = Error_malformedEscape;
+ return false;
+
+ // Assertions
+ case 'b':
+ consume();
+ m_generator.generateAssertionWordBoundary(failures, false);
+ return true;
+
+ case 'B':
+ consume();
+ m_generator.generateAssertionWordBoundary(failures, true);
+ return true;
+
+ // Octal escape
+ case '0':
+ consume();
+ return parseOctalEscape(failures);
+
+ // DecimalEscape
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ // FIXME: This does not support forward references. It's not clear whether they
+ // should be valid.
+ unsigned value = peekDigit();
+ if (value > m_numSubpatterns)
+ return parseOctalEscape(failures);
+ }
+ case '8':
+ case '9': {
+ unsigned value = peekDigit();
+ if (value > m_numSubpatterns) {
+ consume();
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ consume();
+
+ while (peekIsDigit()) {
+ unsigned newValue = value * 10 + peekDigit();
+ if (newValue > m_numSubpatterns)
+ break;
+ value = newValue;
+ consume();
+ }
+
+ parseBackreferenceQuantifier(failures, value);
+ return true;
+ }
+
+ // ControlEscape
+ case 'f':
+ consume();
+ return parsePatternCharacterQualifier(failures, '\f');
+ case 'n':
+ consume();
+ return parsePatternCharacterQualifier(failures, '\n');
+ case 'r':
+ consume();
+ return parsePatternCharacterQualifier(failures, '\r');
+ case 't':
+ consume();
+ return parsePatternCharacterQualifier(failures, '\t');
+ case 'v':
+ consume();
+ return parsePatternCharacterQualifier(failures, '\v');
+
+ // ControlLetter
+ case 'c': {
+ consume();
+ int control = consume();
+ if (!isASCIIAlpha(control)) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ return parsePatternCharacterQualifier(failures, control&31);
+ }
+
+ // HexEscape
+ case 'x': {
+ consume();
+ int x = consumeHex(2);
+ if (x == -1) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ return parsePatternCharacterQualifier(failures, x);
+ }
+
+ // UnicodeEscape
+ case 'u': {
+ consume();
+ int x = consumeHex(4);
+ if (x == -1) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ return parsePatternCharacterQualifier(failures, x);
+ }
+
+ // CharacterClassEscape
+ case 'd':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassDigits(), false);
+ case 's':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassSpaces(), false);
+ case 'w':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassWordchar(), false);
+ case 'D':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassDigits(), true);
+ case 'S':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassSpaces(), true);
+ case 'W':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassWordchar(), true);
+
+ // IdentityEscape
+ default: {
+ // TODO: check this test for IdentifierPart.
+ int ch = consume();
+ if (isASCIIAlphanumeric(ch) || (ch == '_')) {
+ m_err = Error_malformedEscape;
+ return false;
+ }
+ return parsePatternCharacterQualifier(failures, ch);
+ }
+ }
+}
+
+bool WRECParser::parseTerm(JmpSrcVector& failures)
+{
+ switch (peek()) {
+ case EndOfPattern:
+ case '*':
+ case '+':
+ case '?':
+ case ')':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ // Not allowed in a Term!
+ return false;
+
+ case '^':
+ consume();
+ m_generator.generateAssertionBOL(failures);
+ return true;
+
+ case '$':
+ consume();
+ m_generator.generateAssertionEOL(failures);
+ return true;
+
+ case '\\':
+ // b & B are also assertions...
+ consume();
+ return parseEscape(failures);
+
+ case '.':
+ consume();
+ return parseCharacterClassQuantifier(failures, getCharacterClassNewline(), true);
+
+ case '[':
+ // CharacterClass
+ consume();
+ return parseCharacterClass(failures);
+
+ case '(':
+ consume();
+ return parseParentheses(failures);
+
+ default:
+ // Anything else is a PatternCharacter
+ return parsePatternCharacterQualifier(failures, consume());
+ }
+}
+
+/*
+ interface req: CURR_POS is on stack (can be reloaded).
+*/
+void WRECParser::parseDisjunction(JmpSrcVector& failures)
+{
+ parseAlternative(failures);
+
+ if (peek() == '|') {
+ JmpSrcVector successes;
+
+ do {
+ consume();
+
+ m_generator.generateDisjunction(successes, failures);
+
+ parseAlternative(failures);
+ } while (peek() == '|');
+
+ m_generator.terminateDisjunction(successes);
+ }
+}
+
+} // namespace JSC
+
+#endif // ENABLE(WREC)
diff --git a/JavaScriptCore/wrec/WREC.h b/JavaScriptCore/wrec/WREC.h
new file mode 100644
index 0000000..301bd3b
--- /dev/null
+++ b/JavaScriptCore/wrec/WREC.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WREC_h
+#define WREC_h
+
+#if ENABLE(WREC)
+
+#include "ustring.h"
+#include <masm/X86Assembler.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/Vector.h>
+
+#if COMPILER(GCC)
+#define WREC_CALL __attribute__ ((regparm (3)))
+#else
+#define WREC_CALL
+#endif
+
+namespace JSC {
+
+ typedef int (*WRECFunction)(const UChar* input, unsigned start, unsigned length, int* output) WREC_CALL;
+
+ class GenerateAtomFunctor;
+ struct CharacterClassRange;
+ struct CharacterClass;
+
+ struct Quantifier {
+ enum Type {
+ None,
+ Greedy,
+ NonGreedy,
+ Error,
+ };
+
+ Quantifier()
+ : type(None)
+ {
+ }
+
+ Quantifier(Type type, unsigned min = 0, unsigned max = noMaxSpecified)
+ : type(type)
+ , min(min)
+ , max(max)
+ {
+ }
+
+ Type type;
+
+ unsigned min;
+ unsigned max;
+
+ static const unsigned noMaxSpecified = UINT_MAX;
+ };
+
+ class WRECParser;
+
+ typedef Vector<X86Assembler::JmpSrc> JmpSrcVector;
+
+ class WRECGenerator {
+ public:
+ WRECGenerator(WRECParser& parser, X86Assembler& jit)
+ : m_parser(parser)
+ , m_jit(jit)
+ {
+ }
+
+ typedef X86Assembler::JmpSrc JmpSrc;
+ typedef X86Assembler::JmpDst JmpDst;
+
+ // these regs setup by the params
+ static const X86Assembler::RegisterID inputRegister = X86::eax;
+ static const X86Assembler::RegisterID currentPositionRegister = X86::edx;
+ static const X86Assembler::RegisterID lengthRegister = X86::ecx;
+ static const X86Assembler::RegisterID currentValueRegister = X86::esi;
+ static const X86Assembler::RegisterID outputRegister = X86::edi;
+ static const X86Assembler::RegisterID quantifierCountRegister = X86::ebx;
+
+ friend class GenerateAtomFunctor;
+ friend class GeneratePatternCharacterFunctor;
+ friend class GenerateCharacterClassFunctor;
+ friend class GenerateBackreferenceFunctor;
+ friend class GenerateParenthesesNonGreedyFunctor;
+
+ void generateGreedyQuantifier(JmpSrcVector& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max);
+ void generateNonGreedyQuantifier(JmpSrcVector& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max);
+ void generateBacktrack1();
+ void generateBacktrackBackreference(unsigned subpatternId);
+ void generateCharacterClass(JmpSrcVector& failures, CharacterClass& charClass, bool invert);
+ void generateCharacterClassInverted(JmpSrcVector& failures, CharacterClass& charClass);
+ void generateCharacterClassInvertedRange(JmpSrcVector& failures, JmpSrcVector& matchDest, const CharacterClassRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount);
+ void generatePatternCharacter(JmpSrcVector& failures, int ch);
+ void generateAssertionWordBoundary(JmpSrcVector& failures, bool invert);
+ void generateAssertionBOL(JmpSrcVector& failures);
+ void generateAssertionEOL(JmpSrcVector& failures);
+ void generateBackreference(JmpSrcVector& failures, unsigned subpatternID);
+ void generateBackreferenceQuantifier(JmpSrcVector& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max);
+ enum ParenthesesType { capturing, non_capturing, assertion, inverted_assertion }; // order is relied on in generateParentheses()
+ JmpSrc generateParentheses(ParenthesesType type);
+ JmpSrc generateParenthesesResetTrampoline(JmpSrcVector& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter);
+ void generateParenthesesNonGreedy(JmpSrcVector& failures, JmpDst start, JmpSrc success, JmpSrc fail);
+
+ void generateDisjunction(JmpSrcVector& successes, JmpSrcVector& failures);
+ void terminateDisjunction(JmpSrcVector& successes);
+
+ private:
+ WRECParser& m_parser;
+ X86Assembler& m_jit;
+ };
+
+ class WRECParser {
+ public:
+ bool m_ignoreCase;
+ bool m_multiline;
+ unsigned m_numSubpatterns;
+ enum WRECError {
+ NoError,
+ Error_malformedCharacterClass,
+ Error_malformedParentheses,
+ Error_malformedPattern,
+ Error_malformedQuantifier,
+ Error_malformedEscape,
+ TempError_unsupportedQuantifier,
+ TempError_unsupportedParentheses,
+ } m_err;
+
+ WRECParser(const UString& pattern, bool ignoreCase, bool multiline, X86Assembler& jit)
+ : m_ignoreCase(ignoreCase)
+ , m_multiline(multiline)
+ , m_numSubpatterns(0)
+ , m_err(NoError)
+ , m_generator(*this, jit)
+ , m_data(pattern.data())
+ , m_size(pattern.size())
+ , m_index(0)
+ {
+ }
+
+ void parseAlternative(JmpSrcVector& failures)
+ {
+ while (parseTerm(failures)) { }
+ }
+
+ void parseDisjunction(JmpSrcVector& failures);
+
+ bool parseTerm(JmpSrcVector& failures);
+ bool parseEscape(JmpSrcVector& failures);
+ bool parseOctalEscape(JmpSrcVector& failures);
+ bool parseParentheses(JmpSrcVector& failures);
+ bool parseCharacterClass(JmpSrcVector& failures);
+ bool parseCharacterClassQuantifier(JmpSrcVector& failures, CharacterClass& charClass, bool invert);
+ bool parsePatternCharacterQualifier(JmpSrcVector& failures, int ch);
+ bool parseBackreferenceQuantifier(JmpSrcVector& failures, unsigned subpatternId);
+
+ ALWAYS_INLINE Quantifier parseGreedyQuantifier();
+ Quantifier parseQuantifier();
+
+ static const int EndOfPattern = -1;
+
+ int peek()
+ {
+ if (m_index >= m_size)
+ return EndOfPattern;
+ return m_data[m_index];
+ }
+
+ int consume()
+ {
+ if (m_index >= m_size)
+ return EndOfPattern;
+ return m_data[m_index++];
+ }
+
+ bool peekIsDigit()
+ {
+ return WTF::isASCIIDigit(peek());
+ }
+
+ unsigned peekDigit()
+ {
+ ASSERT(peekIsDigit());
+ return peek() - '0';
+ }
+
+ unsigned consumeDigit()
+ {
+ ASSERT(peekIsDigit());
+ return consume() - '0';
+ }
+
+ unsigned consumeNumber()
+ {
+ int n = consumeDigit();
+ while (peekIsDigit()) {
+ n *= 10;
+ n += consumeDigit();
+ }
+ return n;
+ }
+
+ int consumeHex(int count)
+ {
+ int n = 0;
+ while (count--) {
+ if (!WTF::isASCIIHexDigit(peek()))
+ return -1;
+ n = (n<<4) | WTF::toASCIIHexValue(consume());
+ }
+ return n;
+ }
+
+ unsigned consumeOctal()
+ {
+ unsigned n = 0;
+ while (n < 32 && WTF::isASCIIOctalDigit(peek()))
+ n = n * 8 + (consume() - '0');
+ return n;
+ }
+
+ bool isEndOfPattern()
+ {
+ return peek() != EndOfPattern;
+ }
+
+ private:
+ WRECGenerator m_generator;
+ const UChar* m_data;
+ unsigned m_size;
+ unsigned m_index;
+ };
+
+} // namespace JSC
+
+#endif // ENABLE(WREC)
+
+#endif // WREC_h