diff options
Diffstat (limited to 'JavaScriptCore/wrec')
-rw-r--r-- | JavaScriptCore/wrec/CharacterClass.cpp | 140 | ||||
-rw-r--r-- | JavaScriptCore/wrec/CharacterClass.h | 68 | ||||
-rw-r--r-- | JavaScriptCore/wrec/CharacterClassConstructor.cpp | 257 | ||||
-rw-r--r-- | JavaScriptCore/wrec/CharacterClassConstructor.h | 99 | ||||
-rw-r--r-- | JavaScriptCore/wrec/Escapes.h | 150 | ||||
-rw-r--r-- | JavaScriptCore/wrec/Quantifier.h | 66 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WREC.cpp | 86 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WREC.h | 54 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WRECFunctors.cpp | 80 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WRECFunctors.h | 109 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WRECGenerator.cpp | 653 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WRECGenerator.h | 128 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WRECParser.cpp | 643 | ||||
-rw-r--r-- | JavaScriptCore/wrec/WRECParser.h | 214 |
14 files changed, 0 insertions, 2747 deletions
diff --git a/JavaScriptCore/wrec/CharacterClass.cpp b/JavaScriptCore/wrec/CharacterClass.cpp deleted file mode 100644 index e3f12f2..0000000 --- a/JavaScriptCore/wrec/CharacterClass.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "CharacterClass.h" - -#if ENABLE(WREC) - -using namespace WTF; - -namespace JSC { namespace WREC { - -const CharacterClass& CharacterClass::newline() { - static const UChar asciiNewlines[2] = { '\n', '\r' }; - static const UChar unicodeNewlines[2] = { 0x2028, 0x2029 }; - static const CharacterClass charClass = { - asciiNewlines, 2, - 0, 0, - unicodeNewlines, 2, - 0, 0, - }; - - return charClass; -} - -const CharacterClass& CharacterClass::digits() { - static const CharacterRange asciiDigitsRange[1] = { { '0', '9' } }; - static const CharacterClass charClass = { - 0, 0, - asciiDigitsRange, 1, - 0, 0, - 0, 0, - }; - - return charClass; -} - -const CharacterClass& CharacterClass::spaces() { - static const UChar asciiSpaces[1] = { ' ' }; - static const CharacterRange asciiSpacesRange[1] = { { '\t', '\r' } }; - static const UChar unicodeSpaces[8] = { 0x00a0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000 }; - static const CharacterRange unicodeSpacesRange[1] = { { 0x2000, 0x200a } }; - static const CharacterClass charClass = { - asciiSpaces, 1, - asciiSpacesRange, 1, - unicodeSpaces, 8, - unicodeSpacesRange, 1, - }; - - return charClass; -} - -const CharacterClass& CharacterClass::wordchar() { - static const UChar asciiWordchar[1] = { '_' }; - static const CharacterRange asciiWordcharRange[3] = { { '0', '9' }, { 'A', 'Z' }, { 'a', 'z' } }; - static const CharacterClass charClass = { - asciiWordchar, 1, - asciiWordcharRange, 3, - 0, 0, - 0, 0, - }; - - return charClass; -} - -const CharacterClass& CharacterClass::nondigits() { - static const CharacterRange asciiNondigitsRange[2] = { { 0, '0' - 1 }, { '9' + 1, 0x7f } }; - static const CharacterRange unicodeNondigitsRange[1] = { { 0x0080, 0xffff } }; - static const CharacterClass charClass = { - 0, 0, - asciiNondigitsRange, 2, - 0, 0, - unicodeNondigitsRange, 1, - }; - - return charClass; -} - -const CharacterClass& CharacterClass::nonspaces() { - static const CharacterRange asciiNonspacesRange[3] = { { 0, '\t' - 1 }, { '\r' + 1, ' ' - 1 }, { ' ' + 1, 0x7f } }; - static const CharacterRange unicodeNonspacesRange[9] = { - { 0x0080, 0x009f }, - { 0x00a1, 0x167f }, - { 0x1681, 0x180d }, - { 0x180f, 0x1fff }, - { 0x200b, 0x2027 }, - { 0x202a, 0x202e }, - { 0x2030, 0x205e }, - { 0x2060, 0x2fff }, - { 0x3001, 0xffff } - }; - static const CharacterClass charClass = { - 0, 0, - asciiNonspacesRange, 3, - 0, 0, - unicodeNonspacesRange, 9, - }; - - return charClass; -} - -const CharacterClass& CharacterClass::nonwordchar() { - static const UChar asciiNonwordchar[1] = { '`' }; - static const CharacterRange asciiNonwordcharRange[4] = { { 0, '0' - 1 }, { '9' + 1, 'A' - 1 }, { 'Z' + 1, '_' - 1 }, { 'z' + 1, 0x7f } }; - static const CharacterRange unicodeNonwordcharRange[1] = { { 0x0080, 0xffff } }; - static const CharacterClass charClass = { - asciiNonwordchar, 1, - asciiNonwordcharRange, 4, - 0, 0, - unicodeNonwordcharRange, 1, - }; - - return charClass; -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/CharacterClass.h b/JavaScriptCore/wrec/CharacterClass.h deleted file mode 100644 index 8a9d2fc..0000000 --- a/JavaScriptCore/wrec/CharacterClass.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef CharacterClass_h -#define CharacterClass_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include <wtf/unicode/Unicode.h> - -namespace JSC { namespace WREC { - - struct CharacterRange { - UChar begin; - UChar end; - }; - - struct CharacterClass { - static const CharacterClass& newline(); - static const CharacterClass& digits(); - static const CharacterClass& spaces(); - static const CharacterClass& wordchar(); - static const CharacterClass& nondigits(); - static const CharacterClass& nonspaces(); - static const CharacterClass& nonwordchar(); - - const UChar* matches; - unsigned numMatches; - - const CharacterRange* ranges; - unsigned numRanges; - - const UChar* matchesUnicode; - unsigned numMatchesUnicode; - - const CharacterRange* rangesUnicode; - unsigned numRangesUnicode; - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // CharacterClass_h diff --git a/JavaScriptCore/wrec/CharacterClassConstructor.cpp b/JavaScriptCore/wrec/CharacterClassConstructor.cpp deleted file mode 100644 index 06f4262..0000000 --- a/JavaScriptCore/wrec/CharacterClassConstructor.cpp +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (C) 2008, 2009 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "CharacterClassConstructor.h" - -#if ENABLE(WREC) - -#include "pcre_internal.h" -#include <wtf/ASCIICType.h> - -using namespace WTF; - -namespace JSC { namespace WREC { - -void CharacterClassConstructor::addSorted(Vector<UChar>& matches, UChar ch) -{ - unsigned pos = 0; - unsigned range = matches.size(); - - // binary chop, find position to insert char. - while (range) { - unsigned index = range >> 1; - - int val = matches[pos+index] - ch; - if (!val) - return; - else if (val > 0) - range = index; - else { - pos += (index+1); - range -= (index+1); - } - } - - if (pos == matches.size()) - matches.append(ch); - else - matches.insert(pos, ch); -} - -void CharacterClassConstructor::addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi) -{ - unsigned end = ranges.size(); - - // Simple linear scan - I doubt there are that many ranges anyway... - // feel free to fix this with something faster (eg binary chop). - for (unsigned i = 0; i < end; ++i) { - // does the new range fall before the current position in the array - if (hi < ranges[i].begin) { - // optional optimization: concatenate appending ranges? - may not be worthwhile. - if (hi == (ranges[i].begin - 1)) { - ranges[i].begin = lo; - return; - } - CharacterRange r = {lo, hi}; - ranges.insert(i, r); - return; - } - // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining - // If the new range start at or before the end of the last range, then the overlap (if it starts one after the - // end of the last range they concatenate, which is just as good. - if (lo <= (ranges[i].end + 1)) { - // found an intersect! we'll replace this entry in the array. - ranges[i].begin = std::min(ranges[i].begin, lo); - ranges[i].end = std::max(ranges[i].end, hi); - - // now check if the new range can subsume any subsequent ranges. - unsigned next = i+1; - // each iteration of the loop we will either remove something from the list, or break the loop. - while (next < ranges.size()) { - if (ranges[next].begin <= (ranges[i].end + 1)) { - // the next entry now overlaps / concatenates this one. - ranges[i].end = std::max(ranges[i].end, ranges[next].end); - ranges.remove(next); - } else - break; - } - - return; - } - } - - // CharacterRange comes after all existing ranges. - CharacterRange r = {lo, hi}; - ranges.append(r); -} - -void CharacterClassConstructor::put(UChar ch) -{ - // Parsing a regular expression like [a-z], we start in an initial empty state: - // ((m_charBuffer == -1) && !m_isPendingDash) - // When buffer the 'a' sice it may be (and is in this case) part of a range: - // ((m_charBuffer != -1) && !m_isPendingDash) - // Having parsed the hyphen we then record that the dash is also pending: - // ((m_charBuffer != -1) && m_isPendingDash) - // The next change will always take us back to the initial state - either because - // a complete range has been parsed (such as [a-z]), or because a flush is forced, - // due to an early end in the regexp ([a-]), or a character class escape being added - // ([a-\s]). The fourth permutation of m_charBuffer and m_isPendingDash is not permitted. - ASSERT(!((m_charBuffer == -1) && m_isPendingDash)); - - if (m_charBuffer != -1) { - if (m_isPendingDash) { - // EXAMPLE: parsing [-a-c], the 'c' reaches this case - we have buffered a previous character and seen a hyphen, so this is a range. - UChar lo = m_charBuffer; - UChar hi = ch; - // Reset back to the inital state. - m_charBuffer = -1; - m_isPendingDash = false; - - // This is an error, detected lazily. Do not proceed. - if (lo > hi) { - m_isUpsideDown = true; - return; - } - - if (lo <= 0x7f) { - char asciiLo = lo; - char asciiHi = std::min(hi, (UChar)0x7f); - addSortedRange(m_ranges, lo, asciiHi); - - if (m_isCaseInsensitive) { - if ((asciiLo <= 'Z') && (asciiHi >= 'A')) - addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A')); - if ((asciiLo <= 'z') && (asciiHi >= 'a')) - addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a')); - } - } - if (hi >= 0x80) { - UChar unicodeCurr = std::max(lo, (UChar)0x80); - addSortedRange(m_rangesUnicode, unicodeCurr, hi); - - if (m_isCaseInsensitive) { - // we're going to scan along, updating the start of the range - while (unicodeCurr <= hi) { - // Spin forwards over any characters that don't have two cases. - for (; jsc_pcre_ucp_othercase(unicodeCurr) == -1; ++unicodeCurr) { - // if this was the last character in the range, we're done. - if (unicodeCurr == hi) - return; - } - // if we fall through to here, unicodeCurr <= hi & has another case. Get the other case. - UChar rangeStart = unicodeCurr; - UChar otherCurr = jsc_pcre_ucp_othercase(unicodeCurr); - - // If unicodeCurr is not yet hi, check the next char in the range. If it also has another case, - // and if it's other case value is one greater then the othercase value for the current last - // character included in the range, we can include next into the range. - while ((unicodeCurr < hi) && (jsc_pcre_ucp_othercase(unicodeCurr + 1) == (otherCurr + 1))) { - // increment unicodeCurr; it points to the end of the range. - // increment otherCurr, due to the check above other for next must be 1 greater than the currrent other value. - ++unicodeCurr; - ++otherCurr; - } - - // otherChar is the last in the range of other case chars, calculate offset to get back to the start. - addSortedRange(m_rangesUnicode, otherCurr-(unicodeCurr-rangeStart), otherCurr); - - // unicodeCurr has been added, move on to the next char. - ++unicodeCurr; - } - } - } - } else if (ch == '-') - // EXAMPLE: parsing [-a-c], the second '-' reaches this case - the hyphen is treated as potentially indicating a range. - m_isPendingDash = true; - else { - // EXAMPLE: Parsing [-a-c], the 'a' reaches this case - we repace the previously buffered char with the 'a'. - flush(); - m_charBuffer = ch; - } - } else - // EXAMPLE: Parsing [-a-c], the first hyphen reaches this case - there is no buffered character - // (the hyphen not treated as a special character in this case, same handling for any char). - m_charBuffer = ch; -} - -// When a character is added to the set we do not immediately add it to the arrays, in case it is actually defining a range. -// When we have determined the character is not used in specifing a range it is added, in a sorted fashion, to the appropriate -// array (either ascii or unicode). -// If the pattern is case insensitive we add entries for both cases. -void CharacterClassConstructor::flush() -{ - if (m_charBuffer != -1) { - if (m_charBuffer <= 0x7f) { - if (m_isCaseInsensitive && isASCIILower(m_charBuffer)) - addSorted(m_matches, toASCIIUpper(m_charBuffer)); - addSorted(m_matches, m_charBuffer); - if (m_isCaseInsensitive && isASCIIUpper(m_charBuffer)) - addSorted(m_matches, toASCIILower(m_charBuffer)); - } else { - addSorted(m_matchesUnicode, m_charBuffer); - if (m_isCaseInsensitive) { - int other = jsc_pcre_ucp_othercase(m_charBuffer); - if (other != -1) - addSorted(m_matchesUnicode, other); - } - } - m_charBuffer = -1; - } - - if (m_isPendingDash) { - addSorted(m_matches, '-'); - m_isPendingDash = false; - } -} - -void CharacterClassConstructor::append(const CharacterClass& other) -{ - // [x-\s] will add, 'x', '-', and all unicode spaces to new class (same as [x\s-]). - // Need to check the spec, really, but think this matches PCRE behaviour. - flush(); - - if (other.numMatches) { - for (size_t i = 0; i < other.numMatches; ++i) - addSorted(m_matches, other.matches[i]); - } - if (other.numRanges) { - for (size_t i = 0; i < other.numRanges; ++i) - addSortedRange(m_ranges, other.ranges[i].begin, other.ranges[i].end); - } - if (other.numMatchesUnicode) { - for (size_t i = 0; i < other.numMatchesUnicode; ++i) - addSorted(m_matchesUnicode, other.matchesUnicode[i]); - } - if (other.numRangesUnicode) { - for (size_t i = 0; i < other.numRangesUnicode; ++i) - addSortedRange(m_rangesUnicode, other.rangesUnicode[i].begin, other.rangesUnicode[i].end); - } -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/CharacterClassConstructor.h b/JavaScriptCore/wrec/CharacterClassConstructor.h deleted file mode 100644 index 581733d..0000000 --- a/JavaScriptCore/wrec/CharacterClassConstructor.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef CharacterClassConstructor_h -#define CharacterClassConstructor_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include "CharacterClass.h" -#include <wtf/AlwaysInline.h> -#include <wtf/Vector.h> -#include <wtf/unicode/Unicode.h> - -namespace JSC { namespace WREC { - - class CharacterClassConstructor { - public: - CharacterClassConstructor(bool isCaseInsensitive) - : m_charBuffer(-1) - , m_isPendingDash(false) - , m_isCaseInsensitive(isCaseInsensitive) - , m_isUpsideDown(false) - { - } - - void flush(); - - // We need to flush prior to an escaped hyphen to prevent it as being treated as indicating - // a range, e.g. [a\-c] we flush prior to adding the hyphen so that this is not treated as - // [a-c]. However, we do not want to flush if we have already seen a non escaped hyphen - - // e.g. [+-\-] should be treated the same as [+--], producing a range that will also match - // a comma. - void flushBeforeEscapedHyphen() - { - if (!m_isPendingDash) - flush(); - } - - void put(UChar ch); - void append(const CharacterClass& other); - - bool isUpsideDown() { return m_isUpsideDown; } - - ALWAYS_INLINE CharacterClass charClass() - { - CharacterClass newCharClass = { - m_matches.begin(), m_matches.size(), - m_ranges.begin(), m_ranges.size(), - m_matchesUnicode.begin(), m_matchesUnicode.size(), - m_rangesUnicode.begin(), m_rangesUnicode.size(), - }; - - return newCharClass; - } - - private: - void addSorted(Vector<UChar>& matches, UChar ch); - void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi); - - int m_charBuffer; - bool m_isPendingDash; - bool m_isCaseInsensitive; - bool m_isUpsideDown; - - Vector<UChar> m_matches; - Vector<CharacterRange> m_ranges; - Vector<UChar> m_matchesUnicode; - Vector<CharacterRange> m_rangesUnicode; - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // CharacterClassConstructor_h diff --git a/JavaScriptCore/wrec/Escapes.h b/JavaScriptCore/wrec/Escapes.h deleted file mode 100644 index 16c1d6f..0000000 --- a/JavaScriptCore/wrec/Escapes.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef Escapes_h -#define Escapes_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include <wtf/Assertions.h> - -namespace JSC { namespace WREC { - - class CharacterClass; - - class Escape { - public: - enum Type { - PatternCharacter, - CharacterClass, - Backreference, - WordBoundaryAssertion, - Error, - }; - - Escape(Type type) - : m_type(type) - { - } - - Type type() const { return m_type; } - - private: - Type m_type; - - protected: - // Used by subclasses to store data. - union { - int i; - const WREC::CharacterClass* c; - } m_u; - bool m_invert; - }; - - class PatternCharacterEscape : public Escape { - public: - static const PatternCharacterEscape& cast(const Escape& escape) - { - ASSERT(escape.type() == PatternCharacter); - return static_cast<const PatternCharacterEscape&>(escape); - } - - PatternCharacterEscape(int character) - : Escape(PatternCharacter) - { - m_u.i = character; - } - - operator Escape() const { return *this; } - - int character() const { return m_u.i; } - }; - - class CharacterClassEscape : public Escape { - public: - static const CharacterClassEscape& cast(const Escape& escape) - { - ASSERT(escape.type() == CharacterClass); - return static_cast<const CharacterClassEscape&>(escape); - } - - CharacterClassEscape(const WREC::CharacterClass& characterClass, bool invert) - : Escape(CharacterClass) - { - m_u.c = &characterClass; - m_invert = invert; - } - - operator Escape() { return *this; } - - const WREC::CharacterClass& characterClass() const { return *m_u.c; } - bool invert() const { return m_invert; } - }; - - class BackreferenceEscape : public Escape { - public: - static const BackreferenceEscape& cast(const Escape& escape) - { - ASSERT(escape.type() == Backreference); - return static_cast<const BackreferenceEscape&>(escape); - } - - BackreferenceEscape(int subpatternId) - : Escape(Backreference) - { - m_u.i = subpatternId; - } - - operator Escape() const { return *this; } - - int subpatternId() const { return m_u.i; } - }; - - class WordBoundaryAssertionEscape : public Escape { - public: - static const WordBoundaryAssertionEscape& cast(const Escape& escape) - { - ASSERT(escape.type() == WordBoundaryAssertion); - return static_cast<const WordBoundaryAssertionEscape&>(escape); - } - - WordBoundaryAssertionEscape(bool invert) - : Escape(WordBoundaryAssertion) - { - m_invert = invert; - } - - operator Escape() const { return *this; } - - bool invert() const { return m_invert; } - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // Escapes_h diff --git a/JavaScriptCore/wrec/Quantifier.h b/JavaScriptCore/wrec/Quantifier.h deleted file mode 100644 index 3da74cd..0000000 --- a/JavaScriptCore/wrec/Quantifier.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef Quantifier_h -#define Quantifier_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include <wtf/Assertions.h> -#include <limits.h> - -namespace JSC { namespace WREC { - - struct Quantifier { - enum Type { - None, - Greedy, - NonGreedy, - Error, - }; - - Quantifier(Type type = None, unsigned min = 0, unsigned max = Infinity) - : type(type) - , min(min) - , max(max) - { - ASSERT(min <= max); - } - - Type type; - - unsigned min; - unsigned max; - - static const unsigned Infinity = UINT_MAX; - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // Quantifier_h diff --git a/JavaScriptCore/wrec/WREC.cpp b/JavaScriptCore/wrec/WREC.cpp deleted file mode 100644 index 145a1ce..0000000 --- a/JavaScriptCore/wrec/WREC.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "WREC.h" - -#if ENABLE(WREC) - -#include "CharacterClassConstructor.h" -#include "Interpreter.h" -#include "JSGlobalObject.h" -#include "RegisterFile.h" -#include "WRECFunctors.h" -#include "WRECParser.h" -#include "pcre_internal.h" - -using namespace WTF; - -namespace JSC { namespace WREC { - -CompiledRegExp Generator::compileRegExp(JSGlobalData* globalData, const UString& pattern, unsigned* numSubpatterns_ptr, const char** error_ptr, RefPtr<ExecutablePool>& pool, bool ignoreCase, bool multiline) -{ - if (pattern.size() > MAX_PATTERN_SIZE) { - *error_ptr = "regular expression too large"; - return 0; - } - - Parser parser(pattern, ignoreCase, multiline); - Generator& generator = parser.generator(); - MacroAssembler::JumpList failures; - MacroAssembler::Jump endOfInput; - - generator.generateEnter(); - generator.generateSaveIndex(); - - Label beginPattern(&generator); - parser.parsePattern(failures); - generator.generateReturnSuccess(); - - failures.link(&generator); - generator.generateIncrementIndex(&endOfInput); - parser.parsePattern(failures); - generator.generateReturnSuccess(); - - failures.link(&generator); - generator.generateIncrementIndex(); - generator.generateJumpIfNotEndOfInput(beginPattern); - - endOfInput.link(&generator); - generator.generateReturnFailure(); - - if (parser.error()) { - *error_ptr = parser.syntaxError(); // NULL in the case of patterns that WREC doesn't support yet. - return 0; - } - - *numSubpatterns_ptr = parser.numSubpatterns(); - pool = globalData->executableAllocator.poolForSize(generator.size()); - return reinterpret_cast<CompiledRegExp>(generator.copyCode(pool.get())); -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/WREC.h b/JavaScriptCore/wrec/WREC.h deleted file mode 100644 index 13324e7..0000000 --- a/JavaScriptCore/wrec/WREC.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef WREC_h -#define WREC_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include <wtf/unicode/Unicode.h> - -#if COMPILER(GCC) && CPU(X86) -#define WREC_CALL __attribute__ ((regparm (3))) -#else -#define WREC_CALL -#endif - -namespace JSC { - class Interpreter; - class UString; -} - -namespace JSC { namespace WREC { - - typedef int (*CompiledRegExp)(const UChar* input, unsigned start, unsigned length, int* output) WREC_CALL; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // WREC_h diff --git a/JavaScriptCore/wrec/WRECFunctors.cpp b/JavaScriptCore/wrec/WRECFunctors.cpp deleted file mode 100644 index 5f1674e..0000000 --- a/JavaScriptCore/wrec/WRECFunctors.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "WRECFunctors.h" - -#if ENABLE(WREC) - -#include "WRECGenerator.h" - -using namespace WTF; - -namespace JSC { namespace WREC { - -void GeneratePatternCharacterFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) -{ - generator->generatePatternCharacter(failures, m_ch); -} - -void GeneratePatternCharacterFunctor::backtrack(Generator* generator) -{ - generator->generateBacktrack1(); -} - -void GenerateCharacterClassFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) -{ - generator->generateCharacterClass(failures, *m_charClass, m_invert); -} - -void GenerateCharacterClassFunctor::backtrack(Generator* generator) -{ - generator->generateBacktrack1(); -} - -void GenerateBackreferenceFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) -{ - generator->generateBackreference(failures, m_subpatternId); -} - -void GenerateBackreferenceFunctor::backtrack(Generator* generator) -{ - generator->generateBacktrackBackreference(m_subpatternId); -} - -void GenerateParenthesesNonGreedyFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) -{ - generator->generateParenthesesNonGreedy(failures, m_start, m_success, m_fail); -} - -void GenerateParenthesesNonGreedyFunctor::backtrack(Generator*) -{ - // FIXME: do something about this. - CRASH(); -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/WRECFunctors.h b/JavaScriptCore/wrec/WRECFunctors.h deleted file mode 100644 index 610ce55..0000000 --- a/JavaScriptCore/wrec/WRECFunctors.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include "WRECGenerator.h" -#include <wtf/unicode/Unicode.h> - -namespace JSC { namespace WREC { - - struct CharacterClass; - - class GenerateAtomFunctor { - public: - virtual ~GenerateAtomFunctor() {} - - virtual void generateAtom(Generator*, Generator::JumpList&) = 0; - virtual void backtrack(Generator*) = 0; - }; - - class GeneratePatternCharacterFunctor : public GenerateAtomFunctor { - public: - GeneratePatternCharacterFunctor(const UChar ch) - : m_ch(ch) - { - } - - virtual void generateAtom(Generator*, Generator::JumpList&); - virtual void backtrack(Generator*); - - private: - const UChar m_ch; - }; - - class GenerateCharacterClassFunctor : public GenerateAtomFunctor { - public: - GenerateCharacterClassFunctor(const CharacterClass* charClass, bool invert) - : m_charClass(charClass) - , m_invert(invert) - { - } - - virtual void generateAtom(Generator*, Generator::JumpList&); - virtual void backtrack(Generator*); - - private: - const CharacterClass* m_charClass; - bool m_invert; - }; - - class GenerateBackreferenceFunctor : public GenerateAtomFunctor { - public: - GenerateBackreferenceFunctor(unsigned subpatternId) - : m_subpatternId(subpatternId) - { - } - - virtual void generateAtom(Generator*, Generator::JumpList&); - virtual void backtrack(Generator*); - - private: - unsigned m_subpatternId; - }; - - class GenerateParenthesesNonGreedyFunctor : public GenerateAtomFunctor { - public: - GenerateParenthesesNonGreedyFunctor(Generator::Label start, Generator::Jump success, Generator::Jump fail) - : m_start(start) - , m_success(success) - , m_fail(fail) - { - } - - virtual void generateAtom(Generator*, Generator::JumpList&); - virtual void backtrack(Generator*); - - private: - Generator::Label m_start; - Generator::Jump m_success; - Generator::Jump m_fail; - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/WRECGenerator.cpp b/JavaScriptCore/wrec/WRECGenerator.cpp deleted file mode 100644 index 7105984..0000000 --- a/JavaScriptCore/wrec/WRECGenerator.cpp +++ /dev/null @@ -1,653 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "WREC.h" - -#if ENABLE(WREC) - -#include "CharacterClassConstructor.h" -#include "Interpreter.h" -#include "WRECFunctors.h" -#include "WRECParser.h" -#include "pcre_internal.h" - -using namespace WTF; - -namespace JSC { namespace WREC { - -void Generator::generateEnter() -{ -#if CPU(X86) - // On x86 edi & esi are callee preserved registers. - push(X86Registers::edi); - push(X86Registers::esi); - -#if COMPILER(MSVC) - // Move the arguments into registers. - peek(input, 3); - peek(index, 4); - peek(length, 5); - peek(output, 6); -#else - // On gcc the function is regparm(3), so the input, index, and length registers - // (eax, edx, and ecx respectively) already contain the appropriate values. - // Just load the fourth argument (output) into edi - peek(output, 3); -#endif -#endif -} - -void Generator::generateReturnSuccess() -{ - ASSERT(returnRegister != index); - ASSERT(returnRegister != output); - - // Set return value. - pop(returnRegister); // match begin - store32(returnRegister, output); - store32(index, Address(output, 4)); // match end - - // Restore callee save registers. -#if CPU(X86) - pop(X86Registers::esi); - pop(X86Registers::edi); -#endif - ret(); -} - -void Generator::generateSaveIndex() -{ - push(index); -} - -void Generator::generateIncrementIndex(Jump* failure) -{ - peek(index); - if (failure) - *failure = branch32(Equal, length, index); - add32(Imm32(1), index); - poke(index); -} - -void Generator::generateLoadCharacter(JumpList& failures) -{ - failures.append(branch32(Equal, length, index)); - load16(BaseIndex(input, index, TimesTwo), character); -} - -// For the sake of end-of-line assertions, we treat one-past-the-end as if it -// were part of the input string. -void Generator::generateJumpIfNotEndOfInput(Label target) -{ - branch32(LessThanOrEqual, index, length, target); -} - -void Generator::generateReturnFailure() -{ - pop(); - move(Imm32(-1), returnRegister); - -#if CPU(X86) - pop(X86Registers::esi); - pop(X86Registers::edi); -#endif - ret(); -} - -void Generator::generateBacktrack1() -{ - sub32(Imm32(1), index); -} - -void Generator::generateBacktrackBackreference(unsigned subpatternId) -{ - sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index); - add32(Address(output, (2 * subpatternId) * sizeof(int)), index); -} - -void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max) -{ - GenerateBackreferenceFunctor functor(subpatternId); - - load32(Address(output, (2 * subpatternId) * sizeof(int)), character); - Jump skipIfEmpty = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character); - - ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy); - if (quantifierType == Quantifier::Greedy) - generateGreedyQuantifier(failures, functor, min, max); - else - generateNonGreedyQuantifier(failures, functor, min, max); - - skipIfEmpty.link(this); -} - -void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) -{ - JumpList atomFailedList; - JumpList alternativeFailedList; - - // (0) Setup: Save, then init repeatCount. - push(repeatCount); - move(Imm32(0), repeatCount); - Jump start = jump(); - - // (4) Quantifier failed: No more atom reading possible. - Label quantifierFailed(this); - pop(repeatCount); - failures.append(jump()); - - // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again. - Label alternativeFailed(this); - pop(index); - if (max != Quantifier::Infinity) - branch32(Equal, repeatCount, Imm32(max), quantifierFailed); - - // (1) Read an atom. - if (min) - start.link(this); - Label readAtom(this); - functor.generateAtom(this, atomFailedList); - atomFailedList.linkTo(quantifierFailed, this); - add32(Imm32(1), repeatCount); - - // (2) Keep reading if we're under the minimum. - if (min > 1) - branch32(LessThan, repeatCount, Imm32(min), readAtom); - - // (3) Test the rest of the alternative. - if (!min) - start.link(this); - push(index); - m_parser.parseAlternative(alternativeFailedList); - alternativeFailedList.linkTo(alternativeFailed, this); - - pop(); - pop(repeatCount); -} - -void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) -{ - if (!max) - return; - - JumpList doneReadingAtomsList; - JumpList alternativeFailedList; - - // (0) Setup: Save, then init repeatCount. - push(repeatCount); - move(Imm32(0), repeatCount); - - // (1) Greedily read as many copies of the atom as possible, then jump to (2). - Label readAtom(this); - functor.generateAtom(this, doneReadingAtomsList); - add32(Imm32(1), repeatCount); - if (max == Quantifier::Infinity) - jump(readAtom); - else if (max == 1) - doneReadingAtomsList.append(jump()); - else { - branch32(NotEqual, repeatCount, Imm32(max), readAtom); - doneReadingAtomsList.append(jump()); - } - - // (5) Quantifier failed: No more backtracking possible. - Label quantifierFailed(this); - pop(repeatCount); - failures.append(jump()); - - // (4) Alternative failed: Backtrack, then fall through to (2) to try again. - Label alternativeFailed(this); - pop(index); - functor.backtrack(this); - sub32(Imm32(1), repeatCount); - - // (2) Verify that we have enough atoms. - doneReadingAtomsList.link(this); - branch32(LessThan, repeatCount, Imm32(min), quantifierFailed); - - // (3) Test the rest of the alternative. - push(index); - m_parser.parseAlternative(alternativeFailedList); - alternativeFailedList.linkTo(alternativeFailed, this); - - pop(); - pop(repeatCount); -} - -void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count) -{ - for (size_t i = 0; i < count;) { - if (i < count - 1) { - if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) { - i += 2; - continue; - } - } - - generatePatternCharacter(failures, sequence[i]); - ++i; - } -} - -bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2) -{ - if (m_parser.ignoreCase()) { - // Non-trivial case folding requires more than one test, so we can't - // test as a pair with an adjacent character. - if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1)) - return false; - if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2)) - return false; - } - - // Optimistically consume 2 characters. - add32(Imm32(2), index); - failures.append(branch32(GreaterThan, index, length)); - - // Load the characters we just consumed, offset -2 characters from index. - load32(BaseIndex(input, index, TimesTwo, -2 * 2), character); - - if (m_parser.ignoreCase()) { - // Convert ASCII alphabet characters to upper case before testing for - // equality. (ASCII non-alphabet characters don't require upper-casing - // because they have no uppercase equivalents. Unicode characters don't - // require upper-casing because we only handle Unicode characters whose - // upper and lower cases are equal.) - int ch1Mask = 0; - if (isASCIIAlpha(ch1)) { - ch1 |= 32; - ch1Mask = 32; - } - - int ch2Mask = 0; - if (isASCIIAlpha(ch2)) { - ch2 |= 32; - ch2Mask = 32; - } - - int mask = ch1Mask | (ch2Mask << 16); - if (mask) - or32(Imm32(mask), character); - } - int pair = ch1 | (ch2 << 16); - - failures.append(branch32(NotEqual, character, Imm32(pair))); - return true; -} - -void Generator::generatePatternCharacter(JumpList& failures, int ch) -{ - generateLoadCharacter(failures); - - // used for unicode case insensitive - bool hasUpper = false; - Jump isUpper; - - // if case insensitive match - if (m_parser.ignoreCase()) { - UChar lower, upper; - - // check for ascii case sensitive characters - if (isASCIIAlpha(ch)) { - or32(Imm32(32), character); - ch |= 32; - } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) { - // handle unicode case sentitive characters - branch to success on upper - isUpper = branch32(Equal, character, Imm32(upper)); - hasUpper = true; - ch = lower; - } - } - - // checks for ch, or lower case version of ch, if insensitive - failures.append(branch32(NotEqual, character, Imm32((unsigned short)ch))); - - if (m_parser.ignoreCase() && hasUpper) { - // for unicode case insensitive matches, branch here if upper matches. - isUpper.link(this); - } - - // on success consume the char - add32(Imm32(1), index); -} - -void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) -{ - do { - // pick which range we're going to generate - int which = count >> 1; - char lo = ranges[which].begin; - char hi = ranges[which].end; - - // check if there are any ranges or matches below lo. If not, just jl to failure - - // if there is anything else to check, check that first, if it falls through jmp to failure. - if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { - Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); - - // generate code for all ranges before this one - if (which) - generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); - - while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { - matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); - ++*matchIndex; - } - failures.append(jump()); - - loOrAbove.link(this); - } else if (which) { - Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); - - generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); - failures.append(jump()); - - loOrAbove.link(this); - } else - failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); - - while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) - ++*matchIndex; - - matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); - // fall through to here, the value is above hi. - - // shuffle along & loop around if there are any more matches to handle. - unsigned next = which + 1; - ranges += next; - count -= next; - } while (count); -} - -void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass) -{ - Jump unicodeFail; - if (charClass.numMatchesUnicode || charClass.numRangesUnicode) { - Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); - - if (charClass.numMatchesUnicode) { - for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) { - UChar ch = charClass.matchesUnicode[i]; - matchDest.append(branch32(Equal, character, Imm32(ch))); - } - } - - if (charClass.numRangesUnicode) { - for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) { - UChar lo = charClass.rangesUnicode[i].begin; - UChar hi = charClass.rangesUnicode[i].end; - - Jump below = branch32(LessThan, character, Imm32(lo)); - matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); - below.link(this); - } - } - - unicodeFail = jump(); - isAscii.link(this); - } - - if (charClass.numRanges) { - unsigned matchIndex = 0; - JumpList failures; - generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches); - while (matchIndex < charClass.numMatches) - matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass.matches[matchIndex++]))); - - failures.link(this); - } else if (charClass.numMatches) { - // optimization: gather 'a','A' etc back together, can mask & test once. - Vector<char> matchesAZaz; - - for (unsigned i = 0; i < charClass.numMatches; ++i) { - char ch = charClass.matches[i]; - if (m_parser.ignoreCase()) { - if (isASCIILower(ch)) { - matchesAZaz.append(ch); - continue; - } - if (isASCIIUpper(ch)) - continue; - } - matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); - } - - if (unsigned countAZaz = matchesAZaz.size()) { - or32(Imm32(32), character); - for (unsigned i = 0; i < countAZaz; ++i) - matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); - } - } - - if (charClass.numMatchesUnicode || charClass.numRangesUnicode) - unicodeFail.link(this); -} - -void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert) -{ - generateLoadCharacter(failures); - - if (invert) - generateCharacterClassInverted(failures, charClass); - else { - JumpList successes; - generateCharacterClassInverted(successes, charClass); - failures.append(jump()); - successes.link(this); - } - - add32(Imm32(1), index); -} - -void Generator::generateParenthesesAssertion(JumpList& failures) -{ - JumpList disjunctionFailed; - - push(index); - m_parser.parseDisjunction(disjunctionFailed); - Jump success = jump(); - - disjunctionFailed.link(this); - pop(index); - failures.append(jump()); - - success.link(this); - pop(index); -} - -void Generator::generateParenthesesInvertedAssertion(JumpList& failures) -{ - JumpList disjunctionFailed; - - push(index); - m_parser.parseDisjunction(disjunctionFailed); - - // If the disjunction succeeded, the inverted assertion failed. - pop(index); - failures.append(jump()); - - // If the disjunction failed, the inverted assertion succeeded. - disjunctionFailed.link(this); - pop(index); -} - -void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail) -{ - jump(start); - success.link(this); - failures.append(fail); -} - -Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter) -{ - Jump skip = jump(); - newFailures.link(this); - for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) { - store32(Imm32(-1), Address(output, (2 * i) * sizeof(int))); - store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int))); - } - - Jump newFailJump = jump(); - skip.link(this); - - return newFailJump; -} - -void Generator::generateAssertionBOL(JumpList& failures) -{ - if (m_parser.multiline()) { - JumpList previousIsNewline; - - // begin of input == success - previousIsNewline.append(branch32(Equal, index, Imm32(0))); - - // now check prev char against newline characters. - load16(BaseIndex(input, index, TimesTwo, -2), character); - generateCharacterClassInverted(previousIsNewline, CharacterClass::newline()); - - failures.append(jump()); - - previousIsNewline.link(this); - } else - failures.append(branch32(NotEqual, index, Imm32(0))); -} - -void Generator::generateAssertionEOL(JumpList& failures) -{ - if (m_parser.multiline()) { - JumpList nextIsNewline; - - generateLoadCharacter(nextIsNewline); // end of input == success - generateCharacterClassInverted(nextIsNewline, CharacterClass::newline()); - failures.append(jump()); - nextIsNewline.link(this); - } else { - failures.append(branch32(NotEqual, length, index)); - } -} - -void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert) -{ - JumpList wordBoundary; - JumpList notWordBoundary; - - // (1) Check if the previous value was a word char - - // (1.1) check for begin of input - Jump atBegin = branch32(Equal, index, Imm32(0)); - // (1.2) load the last char, and chck if is word character - load16(BaseIndex(input, index, TimesTwo, -2), character); - JumpList previousIsWord; - generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar()); - // (1.3) if we get here, previous is not a word char - atBegin.link(this); - - // (2) Handle situation where previous was NOT a \w - - generateLoadCharacter(notWordBoundary); - generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar()); - // (2.1) If we get here, neither chars are word chars - notWordBoundary.append(jump()); - - // (3) Handle situation where previous was a \w - - // (3.0) link success in first match to here - previousIsWord.link(this); - generateLoadCharacter(wordBoundary); - generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar()); - // (3.1) If we get here, this is an end of a word, within the input. - - // (4) Link everything up - - if (invert) { - // handle the fall through case - wordBoundary.append(jump()); - - // looking for non word boundaries, so link boundary fails to here. - notWordBoundary.link(this); - - failures.append(wordBoundary); - } else { - // looking for word boundaries, so link successes here. - wordBoundary.link(this); - - failures.append(notWordBoundary); - } -} - -void Generator::generateBackreference(JumpList& failures, unsigned subpatternId) -{ - push(index); - push(repeatCount); - - // get the start pos of the backref into repeatCount (multipurpose!) - load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount); - - Jump skipIncrement = jump(); - Label topOfLoop(this); - - add32(Imm32(1), index); - add32(Imm32(1), repeatCount); - skipIncrement.link(this); - - // check if we're at the end of backref (if we are, success!) - Jump endOfBackRef = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount); - - load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character); - - // check if we've run out of input (this would be a can o'fail) - Jump endOfInput = branch32(Equal, length, index); - - branch16(Equal, BaseIndex(input, index, TimesTwo), character, topOfLoop); - - endOfInput.link(this); - - // Failure - pop(repeatCount); - pop(index); - failures.append(jump()); - - // Success - endOfBackRef.link(this); - pop(repeatCount); - pop(); -} - -void Generator::terminateAlternative(JumpList& successes, JumpList& failures) -{ - successes.append(jump()); - - failures.link(this); - peek(index); -} - -void Generator::terminateDisjunction(JumpList& successes) -{ - successes.link(this); -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/WRECGenerator.h b/JavaScriptCore/wrec/WRECGenerator.h deleted file mode 100644 index d707a6e..0000000 --- a/JavaScriptCore/wrec/WRECGenerator.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef WRECGenerator_h -#define WRECGenerator_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include "Quantifier.h" -#include "MacroAssembler.h" -#include <wtf/ASCIICType.h> -#include <wtf/unicode/Unicode.h> -#include "WREC.h" - -namespace JSC { - - class JSGlobalData; - - namespace WREC { - - class CharacterRange; - class GenerateAtomFunctor; - class Parser; - struct CharacterClass; - - class Generator : private MacroAssembler { - public: - using MacroAssembler::Jump; - using MacroAssembler::JumpList; - using MacroAssembler::Label; - - enum ParenthesesType { Capturing, NonCapturing, Assertion, InvertedAssertion, Error }; - - static CompiledRegExp compileRegExp(JSGlobalData*, const UString& pattern, unsigned* numSubpatterns_ptr, const char** error_ptr, RefPtr<ExecutablePool>& pool, bool ignoreCase = false, bool multiline = false); - - Generator(Parser& parser) - : m_parser(parser) - { - } - -#if CPU(X86) - static const RegisterID input = X86Registers::eax; - static const RegisterID index = X86Registers::edx; - static const RegisterID length = X86Registers::ecx; - static const RegisterID output = X86Registers::edi; - - static const RegisterID character = X86Registers::esi; - static const RegisterID repeatCount = X86Registers::ebx; // How many times the current atom repeats in the current match. - - static const RegisterID returnRegister = X86Registers::eax; -#endif -#if CPU(X86_64) - static const RegisterID input = X86Registers::edi; - static const RegisterID index = X86Registers::esi; - static const RegisterID length = X86Registers::edx; - static const RegisterID output = X86Registers::ecx; - - static const RegisterID character = X86Registers::eax; - static const RegisterID repeatCount = X86Registers::ebx; // How many times the current atom repeats in the current match. - - static const RegisterID returnRegister = X86Registers::eax; -#endif - - void generateEnter(); - void generateSaveIndex(); - void generateIncrementIndex(Jump* failure = 0); - void generateLoadCharacter(JumpList& failures); - void generateJumpIfNotEndOfInput(Label); - void generateReturnSuccess(); - void generateReturnFailure(); - - void generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max); - void generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max); - void generateBacktrack1(); - void generateBacktrackBackreference(unsigned subpatternId); - void generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert); - void generateCharacterClassInverted(JumpList& failures, const CharacterClass& charClass); - void generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount); - void generatePatternCharacter(JumpList& failures, int ch); - void generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count); - void generateAssertionWordBoundary(JumpList& failures, bool invert); - void generateAssertionBOL(JumpList& failures); - void generateAssertionEOL(JumpList& failures); - void generateBackreference(JumpList& failures, unsigned subpatternID); - void generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max); - void generateParenthesesAssertion(JumpList& failures); - void generateParenthesesInvertedAssertion(JumpList& failures); - Jump generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter); - void generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail); - - void terminateAlternative(JumpList& successes, JumpList& failures); - void terminateDisjunction(JumpList& successes); - - private: - bool generatePatternCharacterPair(JumpList& failures, int ch1, int ch2); - - Parser& m_parser; - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // WRECGenerator_h diff --git a/JavaScriptCore/wrec/WRECParser.cpp b/JavaScriptCore/wrec/WRECParser.cpp deleted file mode 100644 index 1709bf9..0000000 --- a/JavaScriptCore/wrec/WRECParser.cpp +++ /dev/null @@ -1,643 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "WRECParser.h" - -#if ENABLE(WREC) - -#include "CharacterClassConstructor.h" -#include "WRECFunctors.h" - -using namespace WTF; - -namespace JSC { namespace WREC { - -// These error messages match the error messages used by PCRE. -const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier"; -const char* Parser::QuantifierWithoutAtom = "nothing to repeat"; -const char* Parser::ParenthesesUnmatched = "unmatched parentheses"; -const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?"; -const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet. -const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class"; -const char* Parser::CharacterClassOutOfOrder = "range out of order in character class"; -const char* Parser::EscapeUnterminated = "\\ at end of pattern"; - -class PatternCharacterSequence { -typedef Generator::JumpList JumpList; - -public: - PatternCharacterSequence(Generator& generator, JumpList& failures) - : m_generator(generator) - , m_failures(failures) - { - } - - size_t size() { return m_sequence.size(); } - - void append(int ch) - { - m_sequence.append(ch); - } - - void flush() - { - if (!m_sequence.size()) - return; - - m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size()); - m_sequence.clear(); - } - - void flush(const Quantifier& quantifier) - { - if (!m_sequence.size()) - return; - - m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size() - 1); - - switch (quantifier.type) { - case Quantifier::None: - case Quantifier::Error: - ASSERT_NOT_REACHED(); - break; - - case Quantifier::Greedy: { - GeneratePatternCharacterFunctor functor(m_sequence.last()); - m_generator.generateGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); - break; - } - - case Quantifier::NonGreedy: { - GeneratePatternCharacterFunctor functor(m_sequence.last()); - m_generator.generateNonGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); - break; - } - } - - m_sequence.clear(); - } - -private: - Generator& m_generator; - JumpList& m_failures; - Vector<int, 8> m_sequence; -}; - -ALWAYS_INLINE Quantifier Parser::consumeGreedyQuantifier() -{ - switch (peek()) { - case '?': - consume(); - return Quantifier(Quantifier::Greedy, 0, 1); - - case '*': - consume(); - return Quantifier(Quantifier::Greedy, 0); - - case '+': - consume(); - return Quantifier(Quantifier::Greedy, 1); - - case '{': { - SavedState state(*this); - consume(); - - // Accept: {n}, {n,}, {n,m}. - // Reject: {n,m} where n > m. - // Ignore: Anything else, such as {n, m}. - - if (!peekIsDigit()) { - state.restore(); - return Quantifier(); - } - - unsigned min = consumeNumber(); - unsigned max = min; - - if (peek() == ',') { - consume(); - max = peekIsDigit() ? consumeNumber() : Quantifier::Infinity; - } - - if (peek() != '}') { - state.restore(); - return Quantifier(); - } - consume(); - - if (min > max) { - setError(QuantifierOutOfOrder); - return Quantifier(Quantifier::Error); - } - - return Quantifier(Quantifier::Greedy, min, max); - } - - default: - return Quantifier(); // No quantifier. - } -} - -Quantifier Parser::consumeQuantifier() -{ - Quantifier q = consumeGreedyQuantifier(); - - if ((q.type == Quantifier::Greedy) && (peek() == '?')) { - consume(); - q.type = Quantifier::NonGreedy; - } - - return q; -} - -bool Parser::parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert) -{ - Quantifier q = consumeQuantifier(); - - switch (q.type) { - case Quantifier::None: { - m_generator.generateCharacterClass(failures, charClass, invert); - break; - } - - case Quantifier::Greedy: { - GenerateCharacterClassFunctor functor(&charClass, invert); - m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max); - break; - } - - case Quantifier::NonGreedy: { - GenerateCharacterClassFunctor functor(&charClass, invert); - m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max); - break; - } - - case Quantifier::Error: - return false; - } - - return true; -} - -bool Parser::parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId) -{ - Quantifier q = consumeQuantifier(); - - switch (q.type) { - case Quantifier::None: { - m_generator.generateBackreference(failures, subpatternId); - break; - } - - case Quantifier::Greedy: - case Quantifier::NonGreedy: - m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max); - return true; - - case Quantifier::Error: - return false; - } - - return true; -} - -bool Parser::parseParentheses(JumpList& failures) -{ - ParenthesesType type = consumeParenthesesType(); - - // FIXME: WREC originally failed to backtrack correctly in cases such as - // "c".match(/(.*)c/). Now, most parentheses handling is disabled. For - // unsupported parentheses, we fall back on PCRE. - - switch (type) { - case Generator::Assertion: { - m_generator.generateParenthesesAssertion(failures); - - if (consume() != ')') { - setError(ParenthesesUnmatched); - return false; - } - - Quantifier quantifier = consumeQuantifier(); - if (quantifier.type != Quantifier::None && quantifier.min == 0) { - setError(ParenthesesNotSupported); - return false; - } - - return true; - } - case Generator::InvertedAssertion: { - m_generator.generateParenthesesInvertedAssertion(failures); - - if (consume() != ')') { - setError(ParenthesesUnmatched); - return false; - } - - Quantifier quantifier = consumeQuantifier(); - if (quantifier.type != Quantifier::None && quantifier.min == 0) { - setError(ParenthesesNotSupported); - return false; - } - - return true; - } - default: - setError(ParenthesesNotSupported); - return false; - } -} - -bool Parser::parseCharacterClass(JumpList& failures) -{ - bool invert = false; - if (peek() == '^') { - consume(); - invert = true; - } - - CharacterClassConstructor constructor(m_ignoreCase); - - int ch; - while ((ch = peek()) != ']') { - switch (ch) { - case EndOfPattern: - setError(CharacterClassUnmatched); - return false; - - case '\\': { - consume(); - Escape escape = consumeEscape(true); - - switch (escape.type()) { - case Escape::PatternCharacter: { - int character = PatternCharacterEscape::cast(escape).character(); - if (character == '-') - constructor.flushBeforeEscapedHyphen(); - constructor.put(character); - break; - } - case Escape::CharacterClass: { - const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape); - ASSERT(!characterClassEscape.invert()); - constructor.append(characterClassEscape.characterClass()); - break; - } - case Escape::Error: - return false; - case Escape::Backreference: - case Escape::WordBoundaryAssertion: { - ASSERT_NOT_REACHED(); - break; - } - } - break; - } - - default: - consume(); - constructor.put(ch); - } - } - consume(); - - // lazily catch reversed ranges ([z-a])in character classes - if (constructor.isUpsideDown()) { - setError(CharacterClassOutOfOrder); - return false; - } - - constructor.flush(); - CharacterClass charClass = constructor.charClass(); - return parseCharacterClassQuantifier(failures, charClass, invert); -} - -bool Parser::parseNonCharacterEscape(JumpList& failures, const Escape& escape) -{ - switch (escape.type()) { - case Escape::PatternCharacter: - ASSERT_NOT_REACHED(); - return false; - - case Escape::CharacterClass: - return parseCharacterClassQuantifier(failures, CharacterClassEscape::cast(escape).characterClass(), CharacterClassEscape::cast(escape).invert()); - - case Escape::Backreference: - return parseBackreferenceQuantifier(failures, BackreferenceEscape::cast(escape).subpatternId()); - - case Escape::WordBoundaryAssertion: - m_generator.generateAssertionWordBoundary(failures, WordBoundaryAssertionEscape::cast(escape).invert()); - return true; - - case Escape::Error: - return false; - } - - ASSERT_NOT_REACHED(); - return false; -} - -Escape Parser::consumeEscape(bool inCharacterClass) -{ - switch (peek()) { - case EndOfPattern: - setError(EscapeUnterminated); - return Escape(Escape::Error); - - // Assertions - case 'b': - consume(); - if (inCharacterClass) - return PatternCharacterEscape('\b'); - return WordBoundaryAssertionEscape(false); // do not invert - case 'B': - consume(); - if (inCharacterClass) - return PatternCharacterEscape('B'); - return WordBoundaryAssertionEscape(true); // invert - - // CharacterClassEscape - case 'd': - consume(); - return CharacterClassEscape(CharacterClass::digits(), false); - case 's': - consume(); - return CharacterClassEscape(CharacterClass::spaces(), false); - case 'w': - consume(); - return CharacterClassEscape(CharacterClass::wordchar(), false); - case 'D': - consume(); - return inCharacterClass - ? CharacterClassEscape(CharacterClass::nondigits(), false) - : CharacterClassEscape(CharacterClass::digits(), true); - case 'S': - consume(); - return inCharacterClass - ? CharacterClassEscape(CharacterClass::nonspaces(), false) - : CharacterClassEscape(CharacterClass::spaces(), true); - case 'W': - consume(); - return inCharacterClass - ? CharacterClassEscape(CharacterClass::nonwordchar(), false) - : CharacterClassEscape(CharacterClass::wordchar(), true); - - // DecimalEscape - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - if (peekDigit() > m_numSubpatterns || inCharacterClass) { - // To match Firefox, we parse an invalid backreference in the range [1-7] - // as an octal escape. - return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal()); - } - - int value = 0; - do { - unsigned newValue = value * 10 + peekDigit(); - if (newValue > m_numSubpatterns) - break; - value = newValue; - consume(); - } while (peekIsDigit()); - - return BackreferenceEscape(value); - } - - // Octal escape - case '0': - consume(); - return PatternCharacterEscape(consumeOctal()); - - // ControlEscape - case 'f': - consume(); - return PatternCharacterEscape('\f'); - case 'n': - consume(); - return PatternCharacterEscape('\n'); - case 'r': - consume(); - return PatternCharacterEscape('\r'); - case 't': - consume(); - return PatternCharacterEscape('\t'); - case 'v': - consume(); - return PatternCharacterEscape('\v'); - - // ControlLetter - case 'c': { - SavedState state(*this); - consume(); - - int control = consume(); - // To match Firefox, inside a character class, we also accept numbers - // and '_' as control characters. - if ((!inCharacterClass && !isASCIIAlpha(control)) || (!isASCIIAlphanumeric(control) && control != '_')) { - state.restore(); - return PatternCharacterEscape('\\'); - } - return PatternCharacterEscape(control & 31); - } - - // HexEscape - case 'x': { - consume(); - - SavedState state(*this); - int x = consumeHex(2); - if (x == -1) { - state.restore(); - return PatternCharacterEscape('x'); - } - return PatternCharacterEscape(x); - } - - // UnicodeEscape - case 'u': { - consume(); - - SavedState state(*this); - int x = consumeHex(4); - if (x == -1) { - state.restore(); - return PatternCharacterEscape('u'); - } - return PatternCharacterEscape(x); - } - - // IdentityEscape - default: - return PatternCharacterEscape(consume()); - } -} - -void Parser::parseAlternative(JumpList& failures) -{ - PatternCharacterSequence sequence(m_generator, failures); - - while (1) { - switch (peek()) { - case EndOfPattern: - case '|': - case ')': - sequence.flush(); - return; - - case '*': - case '+': - case '?': - case '{': { - Quantifier q = consumeQuantifier(); - - if (q.type == Quantifier::None) { - sequence.append(consume()); - continue; - } - - if (q.type == Quantifier::Error) - return; - - if (!sequence.size()) { - setError(QuantifierWithoutAtom); - return; - } - - sequence.flush(q); - continue; - } - - case '^': - consume(); - - sequence.flush(); - m_generator.generateAssertionBOL(failures); - continue; - - case '$': - consume(); - - sequence.flush(); - m_generator.generateAssertionEOL(failures); - continue; - - case '.': - consume(); - - sequence.flush(); - if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true)) - return; - continue; - - case '[': - consume(); - - sequence.flush(); - if (!parseCharacterClass(failures)) - return; - continue; - - case '(': - consume(); - - sequence.flush(); - if (!parseParentheses(failures)) - return; - continue; - - case '\\': { - consume(); - - Escape escape = consumeEscape(false); - if (escape.type() == Escape::PatternCharacter) { - sequence.append(PatternCharacterEscape::cast(escape).character()); - continue; - } - - sequence.flush(); - if (!parseNonCharacterEscape(failures, escape)) - return; - continue; - } - - default: - sequence.append(consume()); - continue; - } - } -} - -/* - TOS holds index. -*/ -void Parser::parseDisjunction(JumpList& failures) -{ - parseAlternative(failures); - if (peek() != '|') - return; - - JumpList successes; - do { - consume(); - m_generator.terminateAlternative(successes, failures); - parseAlternative(failures); - } while (peek() == '|'); - - m_generator.terminateDisjunction(successes); -} - -Generator::ParenthesesType Parser::consumeParenthesesType() -{ - if (peek() != '?') - return Generator::Capturing; - consume(); - - switch (consume()) { - case ':': - return Generator::NonCapturing; - - case '=': - return Generator::Assertion; - - case '!': - return Generator::InvertedAssertion; - - default: - setError(ParenthesesTypeInvalid); - return Generator::Error; - } -} - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) diff --git a/JavaScriptCore/wrec/WRECParser.h b/JavaScriptCore/wrec/WRECParser.h deleted file mode 100644 index a3e151b..0000000 --- a/JavaScriptCore/wrec/WRECParser.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef Parser_h -#define Parser_h - -#include <wtf/Platform.h> - -#if ENABLE(WREC) - -#include "Escapes.h" -#include "Quantifier.h" -#include "UString.h" -#include "WRECGenerator.h" -#include <wtf/ASCIICType.h> - -namespace JSC { namespace WREC { - - struct CharacterClass; - - class Parser { - typedef Generator::JumpList JumpList; - typedef Generator::ParenthesesType ParenthesesType; - - friend class SavedState; - - public: - Parser(const UString& pattern, bool ignoreCase, bool multiline) - : m_generator(*this) - , m_data(pattern.data()) - , m_size(pattern.size()) - , m_ignoreCase(ignoreCase) - , m_multiline(multiline) - { - reset(); - } - - Generator& generator() { return m_generator; } - - bool ignoreCase() const { return m_ignoreCase; } - bool multiline() const { return m_multiline; } - - void recordSubpattern() { ++m_numSubpatterns; } - unsigned numSubpatterns() const { return m_numSubpatterns; } - - const char* error() const { return m_error; } - const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } - - void parsePattern(JumpList& failures) - { - reset(); - - parseDisjunction(failures); - - if (peek() != EndOfPattern) - setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. - } - - void parseDisjunction(JumpList& failures); - void parseAlternative(JumpList& failures); - bool parseTerm(JumpList& failures); - bool parseNonCharacterEscape(JumpList& failures, const Escape&); - bool parseParentheses(JumpList& failures); - bool parseCharacterClass(JumpList& failures); - bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert); - bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId); - - private: - class SavedState { - public: - SavedState(Parser& parser) - : m_parser(parser) - , m_index(parser.m_index) - { - } - - void restore() - { - m_parser.m_index = m_index; - } - - private: - Parser& m_parser; - unsigned m_index; - }; - - void reset() - { - m_index = 0; - m_numSubpatterns = 0; - m_error = 0; - } - - void setError(const char* error) - { - if (m_error) - return; - m_error = error; - } - - int peek() - { - if (m_index >= m_size) - return EndOfPattern; - return m_data[m_index]; - } - - int consume() - { - if (m_index >= m_size) - return EndOfPattern; - return m_data[m_index++]; - } - - bool peekIsDigit() - { - return WTF::isASCIIDigit(peek()); - } - - unsigned peekDigit() - { - ASSERT(peekIsDigit()); - return peek() - '0'; - } - - unsigned consumeDigit() - { - ASSERT(peekIsDigit()); - return consume() - '0'; - } - - unsigned consumeNumber() - { - int n = consumeDigit(); - while (peekIsDigit()) { - n *= 10; - n += consumeDigit(); - } - return n; - } - - int consumeHex(int count) - { - int n = 0; - while (count--) { - if (!WTF::isASCIIHexDigit(peek())) - return -1; - n = (n << 4) | WTF::toASCIIHexValue(consume()); - } - return n; - } - - unsigned consumeOctal() - { - unsigned n = 0; - while (n < 32 && WTF::isASCIIOctalDigit(peek())) - n = n * 8 + consumeDigit(); - return n; - } - - ALWAYS_INLINE Quantifier consumeGreedyQuantifier(); - Quantifier consumeQuantifier(); - Escape consumeEscape(bool inCharacterClass); - ParenthesesType consumeParenthesesType(); - - static const int EndOfPattern = -1; - - // Error messages. - static const char* QuantifierOutOfOrder; - static const char* QuantifierWithoutAtom; - static const char* ParenthesesUnmatched; - static const char* ParenthesesTypeInvalid; - static const char* ParenthesesNotSupported; - static const char* CharacterClassUnmatched; - static const char* CharacterClassOutOfOrder; - static const char* EscapeUnterminated; - - Generator m_generator; - const UChar* m_data; - unsigned m_size; - unsigned m_index; - bool m_ignoreCase; - bool m_multiline; - unsigned m_numSubpatterns; - const char* m_error; - }; - -} } // namespace JSC::WREC - -#endif // ENABLE(WREC) - -#endif // Parser_h |