diff options
author | Ben Murdoch <benm@google.com> | 2011-05-13 16:23:25 +0100 |
---|---|---|
committer | Ben Murdoch <benm@google.com> | 2011-05-16 11:35:02 +0100 |
commit | 65f03d4f644ce73618e5f4f50dd694b26f55ae12 (patch) | |
tree | f478babb801e720de7bfaee23443ffe029f58731 /Source/JavaScriptCore/yarr/YarrParser.h | |
parent | 47de4a2fb7262c7ebdb9cd133ad2c54c187454d0 (diff) | |
download | external_webkit-65f03d4f644ce73618e5f4f50dd694b26f55ae12.zip external_webkit-65f03d4f644ce73618e5f4f50dd694b26f55ae12.tar.gz external_webkit-65f03d4f644ce73618e5f4f50dd694b26f55ae12.tar.bz2 |
Merge WebKit at r75993: Initial merge by git.
Change-Id: I602bbdc3974787a3b0450456a30a7868286921c3
Diffstat (limited to 'Source/JavaScriptCore/yarr/YarrParser.h')
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrParser.h | 871 |
1 files changed, 871 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/yarr/YarrParser.h b/Source/JavaScriptCore/yarr/YarrParser.h new file mode 100644 index 0000000..8c5741a --- /dev/null +++ b/Source/JavaScriptCore/yarr/YarrParser.h @@ -0,0 +1,871 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef YarrParser_h +#define YarrParser_h + +#include "UString.h" +#include "Yarr.h" +#include <wtf/ASCIICType.h> +#include <wtf/unicode/Unicode.h> + +namespace JSC { namespace Yarr { + +#define REGEXP_ERROR_PREFIX "Invalid regular expression: " + +enum BuiltInCharacterClassID { + DigitClassID, + SpaceClassID, + WordClassID, + NewlineClassID, +}; + +// The Parser class should not be used directly - only via the Yarr::parse() method. +template<class Delegate> +class Parser { +private: + template<class FriendDelegate> + friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit); + + enum ErrorCode { + NoError, + PatternTooLarge, + QuantifierOutOfOrder, + QuantifierWithoutAtom, + MissingParentheses, + ParenthesesUnmatched, + ParenthesesTypeInvalid, + CharacterClassUnmatched, + CharacterClassOutOfOrder, + EscapeUnterminated, + NumberOfErrorCodes + }; + + /* + * CharacterClassParserDelegate: + * + * The class CharacterClassParserDelegate is used in the parsing of character + * classes. This class handles detection of character ranges. This class + * implements enough of the delegate interface such that it can be passed to + * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused + * to perform the parsing of escape characters in character sets. + */ + class CharacterClassParserDelegate { + public: + CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err) + : m_delegate(delegate) + , m_err(err) + , m_state(Empty) + { + } + + /* + * begin(): + * + * Called at beginning of construction. + */ + void begin(bool invert) + { + m_delegate.atomCharacterClassBegin(invert); + } + + /* + * atomPatternCharacter(): + * + * This method is called either from parseCharacterClass() (for an unescaped + * character in a character class), or from parseEscape(). In the former case + * the value true will be passed for the argument 'hyphenIsRange', and in this + * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/ + * is different to /[a\-z]/). + */ + void atomPatternCharacter(UChar ch, bool hyphenIsRange = false) + { + switch (m_state) { + case AfterCharacterClass: + // Following a builtin character class we need look out for a hyphen. + // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/. + // If we see a hyphen following a charater class then unlike usual + // we'll report it to the delegate immediately, and put ourself into + // a poisoned state. Any following calls to add another character or + // character class will result in an error. (A hypen following a + // character-class is itself valid, but only at the end of a regex). + if (hyphenIsRange && ch == '-') { + m_delegate.atomCharacterClassAtom('-'); + m_state = AfterCharacterClassHyphen; + return; + } + // Otherwise just fall through - cached character so treat this as Empty. + + case Empty: + m_character = ch; + m_state = CachedCharacter; + return; + + case CachedCharacter: + if (hyphenIsRange && ch == '-') + m_state = CachedCharacterHyphen; + else { + m_delegate.atomCharacterClassAtom(m_character); + m_character = ch; + } + return; + + case CachedCharacterHyphen: + if (ch < m_character) { + m_err = CharacterClassOutOfOrder; + return; + } + m_delegate.atomCharacterClassRange(m_character, ch); + m_state = Empty; + return; + + // See coment in atomBuiltInCharacterClass below. + // This too is technically an error, per ECMA-262, and again we + // we chose to allow this. Note a subtlely here that while we + // diverge from the spec's definition of CharacterRange we do + // remain in compliance with the grammar. For example, consider + // the expression /[\d-a-z]/. We comply with the grammar in + // this case by not allowing a-z to be matched as a range. + case AfterCharacterClassHyphen: + m_delegate.atomCharacterClassAtom(ch); + m_state = Empty; + return; + } + } + + /* + * atomBuiltInCharacterClass(): + * + * Adds a built-in character class, called by parseEscape(). + */ + void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) + { + switch (m_state) { + case CachedCharacter: + // Flush the currently cached character, then fall through. + m_delegate.atomCharacterClassAtom(m_character); + + case Empty: + case AfterCharacterClass: + m_state = AfterCharacterClass; + m_delegate.atomCharacterClassBuiltIn(classID, invert); + return; + + // If we hit either of these cases, we have an invalid range that + // looks something like /[x-\d]/ or /[\d-\d]/. + // According to ECMA-262 this should be a syntax error, but + // empirical testing shows this to break teh webz. Instead we + // comply with to the ECMA-262 grammar, and assume the grammar to + // have matched the range correctly, but tweak our interpretation + // of CharacterRange. Effectively we implicitly handle the hyphen + // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/. + case CachedCharacterHyphen: + m_delegate.atomCharacterClassAtom(m_character); + m_delegate.atomCharacterClassAtom('-'); + // fall through + case AfterCharacterClassHyphen: + m_delegate.atomCharacterClassBuiltIn(classID, invert); + m_state = Empty; + return; + } + } + + /* + * end(): + * + * Called at end of construction. + */ + void end() + { + if (m_state == CachedCharacter) + m_delegate.atomCharacterClassAtom(m_character); + else if (m_state == CachedCharacterHyphen) { + m_delegate.atomCharacterClassAtom(m_character); + m_delegate.atomCharacterClassAtom('-'); + } + m_delegate.atomCharacterClassEnd(); + } + + // parseEscape() should never call these delegate methods when + // invoked with inCharacterClass set. + void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); } + void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); } + + private: + Delegate& m_delegate; + ErrorCode& m_err; + enum CharacterClassConstructionState { + Empty, + CachedCharacter, + CachedCharacterHyphen, + AfterCharacterClass, + AfterCharacterClassHyphen, + } m_state; + UChar m_character; + }; + + Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit) + : m_delegate(delegate) + , m_backReferenceLimit(backReferenceLimit) + , m_err(NoError) + , m_data(pattern.characters()) + , m_size(pattern.length()) + , m_index(0) + , m_parenthesesNestingDepth(0) + { + } + + /* + * parseEscape(): + * + * Helper for parseTokens() AND parseCharacterClass(). + * Unlike the other parser methods, this function does not report tokens + * directly to the member delegate (m_delegate), instead tokens are + * emitted to the delegate provided as an argument. In the case of atom + * escapes, parseTokens() will call parseEscape() passing m_delegate as + * an argument, and as such the escape will be reported to the delegate. + * + * However this method may also be used by parseCharacterClass(), in which + * case a CharacterClassParserDelegate will be passed as the delegate that + * tokens should be added to. A boolean flag is also provided to indicate + * whether that an escape in a CharacterClass is being parsed (some parsing + * rules change in this context). + * + * The boolean value returned by this method indicates whether the token + * parsed was an atom (outside of a characted class \b and \B will be + * interpreted as assertions). + */ + template<bool inCharacterClass, class EscapeDelegate> + bool parseEscape(EscapeDelegate& delegate) + { + ASSERT(!m_err); + ASSERT(peek() == '\\'); + consume(); + + if (atEndOfPattern()) { + m_err = EscapeUnterminated; + return false; + } + + switch (peek()) { + // Assertions + case 'b': + consume(); + if (inCharacterClass) + delegate.atomPatternCharacter('\b'); + else { + delegate.assertionWordBoundary(false); + return false; + } + break; + case 'B': + consume(); + if (inCharacterClass) + delegate.atomPatternCharacter('B'); + else { + delegate.assertionWordBoundary(true); + return false; + } + break; + + // CharacterClassEscape + case 'd': + consume(); + delegate.atomBuiltInCharacterClass(DigitClassID, false); + break; + case 's': + consume(); + delegate.atomBuiltInCharacterClass(SpaceClassID, false); + break; + case 'w': + consume(); + delegate.atomBuiltInCharacterClass(WordClassID, false); + break; + case 'D': + consume(); + delegate.atomBuiltInCharacterClass(DigitClassID, true); + break; + case 'S': + consume(); + delegate.atomBuiltInCharacterClass(SpaceClassID, true); + break; + case 'W': + consume(); + delegate.atomBuiltInCharacterClass(WordClassID, true); + break; + + // DecimalEscape + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape. + // First, try to parse this as backreference. + if (!inCharacterClass) { + ParseState state = saveState(); + + unsigned backReference = consumeNumber(); + if (backReference <= m_backReferenceLimit) { + delegate.atomBackReference(backReference); + break; + } + + restoreState(state); + } + + // Not a backreference, and not octal. + if (peek() >= '8') { + delegate.atomPatternCharacter('\\'); + break; + } + + // Fall-through to handle this as an octal escape. + } + + // Octal escape + case '0': + delegate.atomPatternCharacter(consumeOctal()); + break; + + // ControlEscape + case 'f': + consume(); + delegate.atomPatternCharacter('\f'); + break; + case 'n': + consume(); + delegate.atomPatternCharacter('\n'); + break; + case 'r': + consume(); + delegate.atomPatternCharacter('\r'); + break; + case 't': + consume(); + delegate.atomPatternCharacter('\t'); + break; + case 'v': + consume(); + delegate.atomPatternCharacter('\v'); + break; + + // ControlLetter + case 'c': { + ParseState state = saveState(); + consume(); + if (!atEndOfPattern()) { + int control = consume(); + + // To match Firefox, inside a character class, we also accept numbers and '_' as control characters. + if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) { + delegate.atomPatternCharacter(control & 0x1f); + break; + } + } + restoreState(state); + delegate.atomPatternCharacter('\\'); + break; + } + + // HexEscape + case 'x': { + consume(); + int x = tryConsumeHex(2); + if (x == -1) + delegate.atomPatternCharacter('x'); + else + delegate.atomPatternCharacter(x); + break; + } + + // UnicodeEscape + case 'u': { + consume(); + int u = tryConsumeHex(4); + if (u == -1) + delegate.atomPatternCharacter('u'); + else + delegate.atomPatternCharacter(u); + break; + } + + // IdentityEscape + default: + delegate.atomPatternCharacter(consume()); + } + + return true; + } + + /* + * parseAtomEscape(), parseCharacterClassEscape(): + * + * These methods alias to parseEscape(). + */ + bool parseAtomEscape() + { + return parseEscape<false>(m_delegate); + } + void parseCharacterClassEscape(CharacterClassParserDelegate& delegate) + { + parseEscape<true>(delegate); + } + + /* + * parseCharacterClass(): + * + * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape) + * to an instance of CharacterClassParserDelegate, to describe the character class to the + * delegate. + */ + void parseCharacterClass() + { + ASSERT(!m_err); + ASSERT(peek() == '['); + consume(); + + CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err); + + characterClassConstructor.begin(tryConsume('^')); + + while (!atEndOfPattern()) { + switch (peek()) { + case ']': + consume(); + characterClassConstructor.end(); + return; + + case '\\': + parseCharacterClassEscape(characterClassConstructor); + break; + + default: + characterClassConstructor.atomPatternCharacter(consume(), true); + } + + if (m_err) + return; + } + + m_err = CharacterClassUnmatched; + } + + /* + * parseParenthesesBegin(): + * + * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns. + */ + void parseParenthesesBegin() + { + ASSERT(!m_err); + ASSERT(peek() == '('); + consume(); + + if (tryConsume('?')) { + if (atEndOfPattern()) { + m_err = ParenthesesTypeInvalid; + return; + } + + switch (consume()) { + case ':': + m_delegate.atomParenthesesSubpatternBegin(false); + break; + + case '=': + m_delegate.atomParentheticalAssertionBegin(); + break; + + case '!': + m_delegate.atomParentheticalAssertionBegin(true); + break; + + default: + m_err = ParenthesesTypeInvalid; + } + } else + m_delegate.atomParenthesesSubpatternBegin(); + + ++m_parenthesesNestingDepth; + } + + /* + * parseParenthesesEnd(): + * + * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses). + */ + void parseParenthesesEnd() + { + ASSERT(!m_err); + ASSERT(peek() == ')'); + consume(); + + if (m_parenthesesNestingDepth > 0) + m_delegate.atomParenthesesEnd(); + else + m_err = ParenthesesUnmatched; + + --m_parenthesesNestingDepth; + } + + /* + * parseQuantifier(): + * + * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers. + */ + void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max) + { + ASSERT(!m_err); + ASSERT(min <= max); + + if (lastTokenWasAnAtom) + m_delegate.quantifyAtom(min, max, !tryConsume('?')); + else + m_err = QuantifierWithoutAtom; + } + + /* + * parseTokens(): + * + * This method loops over the input pattern reporting tokens to the delegate. + * The method returns when a parse error is detected, or the end of the pattern + * is reached. One piece of state is tracked around the loop, which is whether + * the last token passed to the delegate was an atom (this is necessary to detect + * a parse error when a quantifier provided without an atom to quantify). + */ + void parseTokens() + { + bool lastTokenWasAnAtom = false; + + while (!atEndOfPattern()) { + switch (peek()) { + case '|': + consume(); + m_delegate.disjunction(); + lastTokenWasAnAtom = false; + break; + + case '(': + parseParenthesesBegin(); + lastTokenWasAnAtom = false; + break; + + case ')': + parseParenthesesEnd(); + lastTokenWasAnAtom = true; + break; + + case '^': + consume(); + m_delegate.assertionBOL(); + lastTokenWasAnAtom = false; + break; + + case '$': + consume(); + m_delegate.assertionEOL(); + lastTokenWasAnAtom = false; + break; + + case '.': + consume(); + m_delegate.atomBuiltInCharacterClass(NewlineClassID, true); + lastTokenWasAnAtom = true; + break; + + case '[': + parseCharacterClass(); + lastTokenWasAnAtom = true; + break; + + case '\\': + lastTokenWasAnAtom = parseAtomEscape(); + break; + + case '*': + consume(); + parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite); + lastTokenWasAnAtom = false; + break; + + case '+': + consume(); + parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite); + lastTokenWasAnAtom = false; + break; + + case '?': + consume(); + parseQuantifier(lastTokenWasAnAtom, 0, 1); + lastTokenWasAnAtom = false; + break; + + case '{': { + ParseState state = saveState(); + + consume(); + if (peekIsDigit()) { + unsigned min = consumeNumber(); + unsigned max = min; + + if (tryConsume(',')) + max = peekIsDigit() ? consumeNumber() : quantifyInfinite; + + if (tryConsume('}')) { + if (min <= max) + parseQuantifier(lastTokenWasAnAtom, min, max); + else + m_err = QuantifierOutOfOrder; + lastTokenWasAnAtom = false; + break; + } + } + + restoreState(state); + } // if we did not find a complete quantifer, fall through to the default case. + + default: + m_delegate.atomPatternCharacter(consume()); + lastTokenWasAnAtom = true; + } + + if (m_err) + return; + } + + if (m_parenthesesNestingDepth > 0) + m_err = MissingParentheses; + } + + /* + * parse(): + * + * This method calls parseTokens() to parse over the input and converts any + * error code to a const char* for a result. + */ + const char* parse() + { + if (m_size > MAX_PATTERN_SIZE) + m_err = PatternTooLarge; + else + parseTokens(); + ASSERT(atEndOfPattern() || m_err); + + // The order of this array must match the ErrorCode enum. + static const char* errorMessages[NumberOfErrorCodes] = { + 0, // NoError + REGEXP_ERROR_PREFIX "regular expression too large", + REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier", + REGEXP_ERROR_PREFIX "nothing to repeat", + REGEXP_ERROR_PREFIX "missing )", + REGEXP_ERROR_PREFIX "unmatched parentheses", + REGEXP_ERROR_PREFIX "unrecognized character after (?", + REGEXP_ERROR_PREFIX "missing terminating ] for character class", + REGEXP_ERROR_PREFIX "range out of order in character class", + REGEXP_ERROR_PREFIX "\\ at end of pattern" + }; + + return errorMessages[m_err]; + } + + + // Misc helper functions: + + typedef unsigned ParseState; + + ParseState saveState() + { + return m_index; + } + + void restoreState(ParseState state) + { + m_index = state; + } + + bool atEndOfPattern() + { + ASSERT(m_index <= m_size); + return m_index == m_size; + } + + int peek() + { + ASSERT(m_index < m_size); + return m_data[m_index]; + } + + bool peekIsDigit() + { + return !atEndOfPattern() && WTF::isASCIIDigit(peek()); + } + + unsigned peekDigit() + { + ASSERT(peekIsDigit()); + return peek() - '0'; + } + + int consume() + { + ASSERT(m_index < m_size); + return m_data[m_index++]; + } + + unsigned consumeDigit() + { + ASSERT(peekIsDigit()); + return consume() - '0'; + } + + unsigned consumeNumber() + { + unsigned n = consumeDigit(); + // check for overflow. + for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) { + n = newValue; + consume(); + } + return n; + } + + unsigned consumeOctal() + { + ASSERT(WTF::isASCIIOctalDigit(peek())); + + unsigned n = consumeDigit(); + while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek())) + n = n * 8 + consumeDigit(); + return n; + } + + bool tryConsume(UChar ch) + { + if (atEndOfPattern() || (m_data[m_index] != ch)) + return false; + ++m_index; + return true; + } + + int tryConsumeHex(int count) + { + ParseState state = saveState(); + + int n = 0; + while (count--) { + if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) { + restoreState(state); + return -1; + } + n = (n << 4) | WTF::toASCIIHexValue(consume()); + } + return n; + } + + Delegate& m_delegate; + unsigned m_backReferenceLimit; + ErrorCode m_err; + const UChar* m_data; + unsigned m_size; + unsigned m_index; + unsigned m_parenthesesNestingDepth; + + // Derived by empirical testing of compile time in PCRE and WREC. + static const unsigned MAX_PATTERN_SIZE = 1024 * 1024; +}; + +/* + * Yarr::parse(): + * + * The parse method is passed a pattern to be parsed and a delegate upon which + * callbacks will be made to record the parsed tokens forming the regex. + * Yarr::parse() returns null on success, or a const C string providing an error + * message where a parse error occurs. + * + * The Delegate must implement the following interface: + * + * void assertionBOL(); + * void assertionEOL(); + * void assertionWordBoundary(bool invert); + * + * void atomPatternCharacter(UChar ch); + * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert); + * void atomCharacterClassBegin(bool invert) + * void atomCharacterClassAtom(UChar ch) + * void atomCharacterClassRange(UChar begin, UChar end) + * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) + * void atomCharacterClassEnd() + * void atomParenthesesSubpatternBegin(bool capture = true); + * void atomParentheticalAssertionBegin(bool invert = false); + * void atomParenthesesEnd(); + * void atomBackReference(unsigned subpatternId); + * + * void quantifyAtom(unsigned min, unsigned max, bool greedy); + * + * void disjunction(); + * + * The regular expression is described by a sequence of assertion*() and atom*() + * callbacks to the delegate, describing the terms in the regular expression. + * Following an atom a quantifyAtom() call may occur to indicate that the previous + * atom should be quantified. In the case of atoms described across multiple + * calls (parentheses and character classes) the call to quantifyAtom() will come + * after the call to the atom*End() method, never after atom*Begin(). + * + * Character classes may either be described by a single call to + * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls. + * In the latter case, ...Begin() will be called, followed by a sequence of + * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End(). + * + * Sequences of atoms and assertions are broken into alternatives via calls to + * disjunction(). Assertions, atoms, and disjunctions emitted between calls to + * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern. + * atomParenthesesBegin() is passed a subpatternId. In the case of a regular + * capturing subpattern, this will be the subpatternId associated with these + * parentheses, and will also by definition be the lowest subpatternId of these + * parentheses and of any nested paretheses. The atomParenthesesEnd() method + * is passed the subpatternId of the last capturing subexpression nested within + * these paretheses. In the case of a capturing subpattern with no nested + * capturing subpatterns, the same subpatternId will be passed to the begin and + * end functions. In the case of non-capturing subpatterns the subpatternId + * passed to the begin method is also the first possible subpatternId that might + * be nested within these paretheses. If a set of non-capturing parentheses does + * not contain any capturing subpatterns, then the subpatternId passed to begin + * will be greater than the subpatternId passed to end. + */ + +template<class Delegate> +const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite) +{ + return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse(); +} + +} } // namespace JSC::Yarr + +#endif // YarrParser_h |