5 files changed, 75 insertions, 99 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp
index e40c791..06ecbad 100644
--- a/JavaScriptCore/yarr/RegexCompiler.cpp
+++ b/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -520,7 +520,7 @@ public:
             PatternTerm& term = currentAlternative->lastTerm();
             ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
 
-            if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.parentheses.subpatternId)) {
+            if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) {
                 m_alternative->m_terms.append(PatternTerm::ForwardReference());
                 return;
             }
@@ -595,7 +595,7 @@ public:
             term.quantify(min, QuantifierFixedCount);
             m_alternative->m_terms.append(copyTerm(term));
             // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
-            m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+            m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
             if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
                 m_alternative->lastTerm().parentheses.isCopy = true;
         }
@@ -734,8 +734,7 @@ public:
     // This optimization identifies sets of parentheses that we will never need to backtrack.
     // In these cases we do not need to store state from prior iterations.
     // We can presently avoid backtracking for:
-    //   * where the parens are at the end of the regular expression (last term in any of the
-    //     alternatives of the main body disjunction).
+    //   * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction).
     //   * where the parens are non-capturing, and quantified unbounded greedy (*).
     //   * where the parens do not contain any capturing subpatterns.
     void checkForTerminalParentheses()
@@ -746,13 +745,13 @@ public:
             return;
 
         Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
-        for (size_t i = 0; i < alternatives.size(); ++i) {
+        for (unsigned i =0; i < alternatives.size(); ++i) {
             Vector<PatternTerm>& terms = alternatives[i]->m_terms;
             if (terms.size()) {
                 PatternTerm& term = terms.last();
                 if (term.type == PatternTerm::TypeParenthesesSubpattern
                     && term.quantityType == QuantifierGreedy
-                    && term.quantityCount == quantifyInfinite
+                    && term.quantityCount == UINT_MAX
                     && !term.capture())
                     term.parentheses.isTerminal = true;
             }
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index a51cd25..164158e 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -515,7 +515,8 @@ public:
         if (matchEnd == -1)
             return true;
 
-        ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
+        ASSERT((matchBegin == -1) == (matchEnd == -1));
+        ASSERT(matchBegin <= matchEnd);
 
         if (matchBegin == matchEnd)
             return true;
@@ -557,7 +558,8 @@ public:
 
         int matchBegin = output[(term.atom.subpatternId << 1)];
         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
-        ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
+        ASSERT((matchBegin == -1) == (matchEnd == -1));
+        ASSERT(matchBegin <= matchEnd);
 
         if (matchBegin == matchEnd)
             return false;
@@ -717,7 +719,7 @@ public:
                 if (term.capture()) {
                     // Technically this access to inputPosition should be accessing the begin term's
                     // inputPosition, but for repeats other than fixed these values should be
-                    // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
+                    // the same anyway! (we don't pre-check for greedy or non-greedy matches.)
                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
                     unsigned subpatternId = term.atom.subpatternId;
@@ -737,7 +739,7 @@ public:
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
         ASSERT(term.atom.quantityType == QuantifierGreedy);
-        ASSERT(term.atom.quantityCount == quantifyInfinite);
+        ASSERT(term.atom.quantityCount == UINT_MAX);
         ASSERT(!term.capture());
 
         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
@@ -763,7 +765,7 @@ public:
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
         ASSERT(term.atom.quantityType == QuantifierGreedy);
-        ASSERT(term.atom.quantityCount == quantifyInfinite);
+        ASSERT(term.atom.quantityCount == UINT_MAX);
         ASSERT(!term.capture());
 
         // If we backtrack to this point, we have failed to match this iteration of the parens.
@@ -775,7 +777,7 @@ public:
     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
     {
         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
-        // should always be returned as a successful match - we should never backtrack to here.
+        // should always be returned as a successful match - we should never becktrack to here.
         ASSERT_NOT_REACHED();
         return false;
     }
@@ -1826,7 +1828,7 @@ public:
                     break;
 
                 case PatternTerm::TypeBackReference:
-                    atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+                    atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
                         break;
 
                 case PatternTerm::TypeForwardReference:
diff --git a/JavaScriptCore/yarr/RegexJIT.cpp b/JavaScriptCore/yarr/RegexJIT.cpp
index 1eac667..acbd458 100644
--- a/JavaScriptCore/yarr/RegexJIT.cpp
+++ b/JavaScriptCore/yarr/RegexJIT.cpp
@@ -624,7 +624,7 @@ class RegexGenerator : private MacroAssembler {
 
         add32(Imm32(1), countRegister);
         add32(Imm32(1), index);
-        if (term.quantityCount != quantifyInfinite) {
+        if (term.quantityCount != 0xffffffff) {
             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
             failures.append(jump());
         } else
@@ -662,7 +662,7 @@ class RegexGenerator : private MacroAssembler {
         loadFromFrame(term.frameLocation, countRegister);
 
         atEndOfInput().linkTo(hardFail, this);
-        if (term.quantityCount != quantifyInfinite)
+        if (term.quantityCount != 0xffffffff)
             branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
             readCharacter(state.inputOffset(), character);
@@ -749,7 +749,7 @@ class RegexGenerator : private MacroAssembler {
 
         add32(Imm32(1), countRegister);
         add32(Imm32(1), index);
-        if (term.quantityCount != quantifyInfinite) {
+        if (term.quantityCount != 0xffffffff) {
             branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
             failures.append(jump());
         } else
diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h
index 8392cdf..a5c0ba2 100644
--- a/JavaScriptCore/yarr/RegexParser.h
+++ b/JavaScriptCore/yarr/RegexParser.h
@@ -33,8 +33,6 @@
 
 namespace JSC { namespace Yarr {
 
-static const unsigned quantifyInfinite = UINT_MAX;
-
 enum BuiltInCharacterClassID {
     DigitClassID,
     SpaceClassID,
@@ -58,7 +56,6 @@ private:
         ParenthesesUnmatched,
         ParenthesesTypeInvalid,
         CharacterClassUnmatched,
-        CharacterClassInvalidRange,
         CharacterClassOutOfOrder,
         EscapeUnterminated,
         NumberOfErrorCodes
@@ -78,7 +75,7 @@ private:
         CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
             : m_delegate(delegate)
             , m_err(err)
-            , m_state(Empty)
+            , m_state(empty)
         {
         }
 
@@ -93,88 +90,65 @@ private:
         }
 
         /*
-         * atomPatternCharacter():
+         * atomPatternCharacterUnescaped():
          *
-         * This method is called either from parseCharacterClass() (for an unescaped
-         * character in a character class), or from parseEscape(). In the former case
-         * the value true will be passed for the argument 'hyphenIsRange', and in this
-         * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/
-         * is different to /[a\-z]/).
+         * This method is called directly from parseCharacterClass(), to report a new
+         * pattern character token.  This method differs from atomPatternCharacter(),
+         * which will be called from parseEscape(), since a hypen provided via this
+         * method may be indicating a character range, but a hyphen parsed by
+         * parseEscape() cannot be interpreted as doing so.
          */
-        void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)
+        void atomPatternCharacterUnescaped(UChar ch)
         {
             switch (m_state) {
-            case AfterCharacterClass:
-                // Following a builtin character class we need look out for a hyphen.
-                // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/.
-                // If we see a hyphen following a charater class then unlike usual
-                // we'll report it to the delegate immediately, and put ourself into
-                // a poisoned state. Any following calls to add another character or
-                // character class will result in an error. (A hypen following a
-                // character-class is itself valid, but only  at the end of a regex).
-                if (hyphenIsRange && ch == '-') {
-                    m_delegate.atomCharacterClassAtom('-');
-                    m_state = AfterCharacterClassHyphen;
-                    return;
-                }
-                // Otherwise just fall through - cached character so treat this as Empty.
-
-            case Empty:
+            case empty:
                 m_character = ch;
-                m_state = CachedCharacter;
-                return;
+                m_state = cachedCharacter;
+                break;
 
-            case CachedCharacter:
-                if (hyphenIsRange && ch == '-')
-                    m_state = CachedCharacterHyphen;
+            case cachedCharacter:
+                if (ch == '-')
+                    m_state = cachedCharacterHyphen;
                 else {
                     m_delegate.atomCharacterClassAtom(m_character);
                     m_character = ch;
                 }
-                return;
+                break;
 
-            case CachedCharacterHyphen:
-                if (ch < m_character) {
+            case cachedCharacterHyphen:
+                if (ch >= m_character)
+                    m_delegate.atomCharacterClassRange(m_character, ch);
+                else
                     m_err = CharacterClassOutOfOrder;
-                    return;
-                }
-                m_delegate.atomCharacterClassRange(m_character, ch);
-                m_state = Empty;
-                return;
-
-            case AfterCharacterClassHyphen:
-                // Error! We have something like /[\d-x]/.
-                m_err = CharacterClassInvalidRange;
-                return;
+                m_state = empty;
             }
         }
 
         /*
+         * atomPatternCharacter():
+         *
+         * Adds a pattern character, called by parseEscape(), as such will not
+         * interpret a hyphen as indicating a character range.
+         */
+        void atomPatternCharacter(UChar ch)
+        {
+            // Flush if a character is already pending to prevent the
+            // hyphen from begin interpreted as indicating a range.
+            if((ch == '-') && (m_state == cachedCharacter))
+                flush();
+
+            atomPatternCharacterUnescaped(ch);
+        }
+
+        /*
          * atomBuiltInCharacterClass():
          *
          * Adds a built-in character class, called by parseEscape().
          */
         void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
         {
-            switch (m_state) {
-            case CachedCharacter:
-                // Flush the currently cached character, then fall through.
-                m_delegate.atomCharacterClassAtom(m_character);
-
-            case Empty:
-            case AfterCharacterClass:
-                m_state = AfterCharacterClass;
-                m_delegate.atomCharacterClassBuiltIn(classID, invert);
-                return;
-
-            case CachedCharacterHyphen:
-            case AfterCharacterClassHyphen:
-                // Error! If we hit either of these cases, we have an
-                // invalid range that looks something like /[x-\d]/
-                // or /[\d-\d]/.
-                m_err = CharacterClassInvalidRange;
-                return;
-            }
+            flush();
+            m_delegate.atomCharacterClassBuiltIn(classID, invert);
         }
 
         /*
@@ -184,12 +158,7 @@ private:
          */
         void end()
         {
-            if (m_state == CachedCharacter)
-                m_delegate.atomCharacterClassAtom(m_character);
-            else if (m_state == CachedCharacterHyphen) {
-                m_delegate.atomCharacterClassAtom(m_character);
-                m_delegate.atomCharacterClassAtom('-');
-            }
+            flush();
             m_delegate.atomCharacterClassEnd();
         }
 
@@ -199,14 +168,21 @@ private:
         void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
 
     private:
+        void flush()
+        {
+            if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
+                m_delegate.atomCharacterClassAtom(m_character);
+            if (m_state == cachedCharacterHyphen)
+                m_delegate.atomCharacterClassAtom('-');
+            m_state = empty;
+        }
+    
         Delegate& m_delegate;
         ErrorCode& m_err;
         enum CharacterClassConstructionState {
-            Empty,
-            CachedCharacter,
-            CachedCharacterHyphen,
-            AfterCharacterClass,
-            AfterCharacterClassHyphen,
+            empty,
+            cachedCharacter,
+            cachedCharacterHyphen,
         } m_state;
         UChar m_character;
     };
@@ -452,7 +428,7 @@ private:
                 break;
 
             default:
-                characterClassConstructor.atomPatternCharacter(consume(), true);
+                characterClassConstructor.atomPatternCharacterUnescaped(consume());
             }
 
             if (m_err)
@@ -596,13 +572,13 @@ private:
 
             case '*':
                 consume();
-                parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite);
+                parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
                 lastTokenWasAnAtom = false;
                 break;
 
             case '+':
                 consume();
-                parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite);
+                parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
                 lastTokenWasAnAtom = false;
                 break;
 
@@ -621,7 +597,7 @@ private:
                     unsigned max = min;
                     
                     if (tryConsume(','))
-                        max = peekIsDigit() ? consumeNumber() : quantifyInfinite;
+                        max = peekIsDigit() ? consumeNumber() : UINT_MAX;
 
                     if (tryConsume('}')) {
                         if (min <= max)
@@ -681,7 +657,6 @@ private:
             "unmatched parentheses",
             "unrecognized character after (?",
             "missing terminating ] for character class",
-            "invalid range in character class",
             "range out of order in character class",
             "\\ at end of pattern"
         };
@@ -863,7 +838,7 @@ private:
  */
 
 template<class Delegate>
-const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite)
+const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
 {
     return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
 }
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h
index 8a7d35b..c76c641 100644
--- a/JavaScriptCore/yarr/RegexPattern.h
+++ b/JavaScriptCore/yarr/RegexPattern.h
@@ -107,7 +107,7 @@ struct PatternTerm {
     union {
         UChar patternCharacter;
         CharacterClass* characterClass;
-        unsigned backReferenceSubpatternId;
+        unsigned subpatternId;
         struct {
             PatternDisjunction* disjunction;
             unsigned subpatternId;
@@ -162,7 +162,7 @@ struct PatternTerm {
         : type(TypeBackReference)
         , invertOrCapture(false)
     {
-        backReferenceSubpatternId = spatternId;
+        subpatternId = spatternId;
         quantityType = QuantifierFixedCount;
         quantityCount = 1;
     }