8 files changed, 1274 insertions, 514 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp
index 9cd3d12..06ecbad 100644
--- a/JavaScriptCore/yarr/RegexCompiler.cpp
+++ b/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -30,12 +31,12 @@
 #include "RegexPattern.h"
 #include <wtf/Vector.h>
 
-#if ENABLE(YARR)
-
 using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
+#include "RegExpJitTables.h"
+
 class CharacterClassConstructor {
 public:
     CharacterClassConstructor(bool isCaseInsensitive = false)
@@ -141,7 +142,7 @@ public:
 
     CharacterClass* charClass()
     {
-        CharacterClass* characterClass = new CharacterClass();
+        CharacterClass* characterClass = new CharacterClass(0);
 
         characterClass->m_matches.append(m_matches);
         characterClass->m_ranges.append(m_ranges);
@@ -233,110 +234,118 @@ private:
     Vector<CharacterRange> m_rangesUnicode;
 };
 
+struct BeginCharHelper {
+    BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)
+        : m_beginChars(beginChars)
+        , m_isCaseInsensitive(isCaseInsensitive)
+    {}
+
+    void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)
+    {
+        if (quantityType == QuantifierFixedCount && quantityCount > 1) {
+            // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/
+            beginChar.value |= beginChar.value << 16;
+            beginChar.mask |= beginChar.mask << 16;
+            addCharacter(beginChar);
+        } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())
+            // In case of characters with fixed quantifier we should check the next character as well.
+            linkHotTerms(beginChar, hotTerms);
+        else
+            // In case of greedy matching the next character checking is unnecessary therefore we just store
+            // the first character.
+            addCharacter(beginChar);
+    }
 
-CharacterClass* newlineCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+    // Merge two following BeginChars in the vector to reduce the number of character checks.
+    void merge(unsigned size)
+    {
+        for (unsigned i = 0; i < size; i++) {
+            BeginChar* curr = &m_beginChars->at(i);
+            BeginChar* next = &m_beginChars->at(i + 1);
 
-    characterClass->m_matches.append('\n');
-    characterClass->m_matches.append('\r');
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    
-    return characterClass;
-}
+            // If the current and the next size of value is different we should skip the merge process
+            // because the 16bit and 32bit values are unmergable.
+            if (curr->value <= 0xFFFF && next->value > 0xFFFF)
+                continue;
 
-CharacterClass* digitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+            unsigned diff = curr->value ^ next->value;
 
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    
-    return characterClass;
-}
+            curr->mask |= diff;
+            curr->value |= curr->mask;
 
-CharacterClass* spacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append(' ');
-    characterClass->m_ranges.append(CharacterRange('\t', '\r'));
-    characterClass->m_matchesUnicode.append(0x00a0);
-    characterClass->m_matchesUnicode.append(0x1680);
-    characterClass->m_matchesUnicode.append(0x180e);
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    characterClass->m_matchesUnicode.append(0x202f);
-    characterClass->m_matchesUnicode.append(0x205f);
-    characterClass->m_matchesUnicode.append(0x3000);
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
-    
-    return characterClass;
-}
+            m_beginChars->remove(i + 1);
+            size--;
+        }
+    }
 
-CharacterClass* wordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+private:
+    void addCharacter(BeginChar beginChar)
+    {
+        unsigned pos = 0;
+        unsigned range = m_beginChars->size();
 
-    characterClass->m_matches.append('_');
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    characterClass->m_ranges.append(CharacterRange('A', 'Z'));
-    characterClass->m_ranges.append(CharacterRange('a', 'z'));
-    
-    return characterClass;
-}
+        // binary chop, find position to insert char.
+        while (range) {
+            unsigned index = range >> 1;
 
-CharacterClass* nondigitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+            int val = m_beginChars->at(pos+index).value - beginChar.value;
+            if (!val)
+                return;
+            if (val < 0)
+                range = index;
+            else {
+                pos += (index+1);
+                range -= (index+1);
+            }
+        }
 
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-    
-    return characterClass;
-}
+        if (pos == m_beginChars->size())
+            m_beginChars->append(beginChar);
+        else
+            m_beginChars->insert(pos, beginChar);
+    }
 
-CharacterClass* nonspacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
-    characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
-    characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
-    
-    return characterClass;
-}
+    // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.
+    void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)
+    {
+        for (unsigned i = 0; i < hotTerms->size(); i++) {
+            PatternTerm hotTerm = hotTerms->at(i).term;
+            ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);
 
-CharacterClass* nonwordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+            UChar characterNext = hotTerm.patternCharacter;
 
-    characterClass->m_matches.append('`');
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
-    characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
-    characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
+            // Append a character to an existing BeginChar object.
+            if (characterNext <= 0x7f) {
+                unsigned mask = 0;
 
-    return characterClass;
-}
+                if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {
+                    mask = 32;
+                    characterNext = toASCIILower(characterNext);
+                }
 
+                addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));
+            } else {
+                UChar upper, lower;
+                if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {
+                    addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));
+                    addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));
+                } else
+                    addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));
+            }
+        }
+    }
+
+    Vector<BeginChar>* m_beginChars;
+    bool m_isCaseInsensitive;
+};
 
 class RegexPatternConstructor {
 public:
     RegexPatternConstructor(RegexPattern& pattern)
         : m_pattern(pattern)
         , m_characterClassConstructor(pattern.m_ignoreCase)
+        , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)
+        , m_invertParentheticalAssertion(false)
     {
     }
 
@@ -352,6 +361,11 @@ public:
     
     void assertionBOL()
     {
+        if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
+            m_alternative->m_startsWithBOL = true;
+            m_alternative->m_containsBOL = true;
+            m_pattern.m_containsBOL = true;
+        }
         m_alternative->m_terms.append(PatternTerm::BOL());
     }
     void assertionEOL()
@@ -455,20 +469,42 @@ public:
         m_pattern.m_disjunctions.append(parenthesesDisjunction);
         m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert));
         m_alternative = parenthesesDisjunction->addNewAlternative();
+        m_invertParentheticalAssertion = invert;
     }
 
     void atomParenthesesEnd()
     {
         ASSERT(m_alternative->m_parent);
         ASSERT(m_alternative->m_parent->m_parent);
+
+        PatternDisjunction* parenthesisDisjunction = m_alternative->m_parent;
         m_alternative = m_alternative->m_parent->m_parent;
+
+        PatternTerm& lastTerm = m_alternative->lastTerm();
+        
+        unsigned numParenAlternatives = parenthesisDisjunction->m_alternatives.size();
+        unsigned numBOLAnchoredAlts = 0;
+        // Bubble up BOL flags
+        for (unsigned i = 0; i < numParenAlternatives; i++) {
+            if (parenthesisDisjunction->m_alternatives[i]->m_startsWithBOL)
+                numBOLAnchoredAlts++;
+        }
         
-        m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+        if (numBOLAnchoredAlts) {
+            m_alternative->m_containsBOL = true;
+            // If all the alternatives in parens start with BOL, then so does this one
+            if (numBOLAnchoredAlts == numParenAlternatives)
+                m_alternative->m_startsWithBOL = true;
+        }
+        
+        lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+        m_invertParentheticalAssertion = false;
     }
 
     void atomBackReference(unsigned subpatternId)
     {
         ASSERT(subpatternId);
+        m_pattern.m_containsBackreferences = true;
         m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
 
         if (subpatternId > m_pattern.m_numSubpatterns) {
@@ -493,32 +529,39 @@ public:
         m_alternative->m_terms.append(PatternTerm(subpatternId));
     }
 
-    PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction)
+    // deep copy the argument disjunction.  If filterStartsWithBOL is true, 
+    // skip alternatives with m_startsWithBOL set true.
+    PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
     {
-        PatternDisjunction* newDisjunction = new PatternDisjunction();
-
-        newDisjunction->m_parent = disjunction->m_parent;
+        PatternDisjunction* newDisjunction = 0;
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             PatternAlternative* alternative = disjunction->m_alternatives[alt];
-            PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
-            for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
-                newAlternative->m_terms.append(copyTerm(alternative->m_terms[i]));
+            if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
+                if (!newDisjunction) {
+                    newDisjunction = new PatternDisjunction();
+                    newDisjunction->m_parent = disjunction->m_parent;
+                }
+                PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+                for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
+                    newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
+            }
         }
-
-        m_pattern.m_disjunctions.append(newDisjunction);
+        
+        if (newDisjunction)
+            m_pattern.m_disjunctions.append(newDisjunction);
         return newDisjunction;
     }
-
-    PatternTerm copyTerm(PatternTerm& term)
+    
+    PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
     {
         if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
             return PatternTerm(term);
-
+        
         PatternTerm termCopy = term;
-        termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction);
+        termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
         return termCopy;
     }
-
+    
     void quantifyAtom(unsigned min, unsigned max, bool greedy)
     {
         ASSERT(min <= max);
@@ -624,14 +667,17 @@ public:
             case PatternTerm::TypeParenthesesSubpattern:
                 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
                 term.frameLocation = currentCallFrameSize;
-                if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
-                    if (term.quantityType == QuantifierFixedCount) {
-                        currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
-                        currentInputPosition += term.parentheses.disjunction->m_minimumSize;
-                    } else {
+                if (term.quantityCount == 1 && !term.parentheses.isCopy) {
+                    if (term.quantityType != QuantifierFixedCount)
                         currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
-                        currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
-                    }
+                    currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+                    // If quantity is fixed, then pre-check its minimum size.
+                    if (term.quantityType == QuantifierFixedCount)
+                        currentInputPosition += term.parentheses.disjunction->m_minimumSize;
+                    term.inputPosition = currentInputPosition;
+                } else if (term.parentheses.isTerminal) {
+                    currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal;
+                    currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
                     term.inputPosition = currentInputPosition;
                 } else {
                     term.inputPosition = currentInputPosition;
@@ -685,11 +731,208 @@ public:
         setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
     }
 
+    // This optimization identifies sets of parentheses that we will never need to backtrack.
+    // In these cases we do not need to store state from prior iterations.
+    // We can presently avoid backtracking for:
+    //   * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction).
+    //   * where the parens are non-capturing, and quantified unbounded greedy (*).
+    //   * where the parens do not contain any capturing subpatterns.
+    void checkForTerminalParentheses()
+    {
+        // This check is much too crude; should be just checking whether the candidate
+        // node contains nested capturing subpatterns, not the whole expression!
+        if (m_pattern.m_numSubpatterns)
+            return;
+
+        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+        for (unsigned i =0; i < alternatives.size(); ++i) {
+            Vector<PatternTerm>& terms = alternatives[i]->m_terms;
+            if (terms.size()) {
+                PatternTerm& term = terms.last();
+                if (term.type == PatternTerm::TypeParenthesesSubpattern
+                    && term.quantityType == QuantifierGreedy
+                    && term.quantityCount == UINT_MAX
+                    && !term.capture())
+                    term.parentheses.isTerminal = true;
+            }
+        }
+    }
+
+    void optimizeBOL()
+    {
+        // Look for expressions containing beginning of line (^) anchoring and unroll them.
+        // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
+        // This code relies on the parsing code tagging alternatives with m_containsBOL and
+        // m_startsWithBOL and rolling those up to containing alternatives.
+        // At this point, this is only valid for non-multiline expressions.
+        PatternDisjunction* disjunction = m_pattern.m_body;
+        
+        if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
+            return;
+        
+        PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
+
+        // Set alternatives in disjunction to "onceThrough"
+        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
+            disjunction->m_alternatives[alt]->setOnceThrough();
+
+        if (loopDisjunction) {
+            // Move alternatives from loopDisjunction to disjunction
+            for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
+                disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
+                
+            loopDisjunction->m_alternatives.clear();
+        }
+    }
+
+    bool addBeginTerm(PatternTerm term, Vector<TermChain>* beginTerms, PatternAlternative* alternative, unsigned numTerms, unsigned termIndex, unsigned depth)
+    {
+        if (term.quantityType == QuantifierFixedCount) {
+            beginTerms->append(TermChain(term));
+            if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)
+                setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1);
+        } else if (termIndex != numTerms - 1) {
+            beginTerms->append(TermChain(term));
+            return true;
+        }
+
+        return false;
+    }
+
+    // This function collects the terms which are potentially matching the first number of depth characters in the result.
+    // If this function returns false then it found at least one term which makes the beginning character
+    // look-up optimization inefficient.
+    bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)
+    {
+        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+            PatternAlternative* alternative = disjunction->m_alternatives[alt];
+
+            if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)
+    {
+        bool checkNext = true;
+        unsigned numTerms = alternative->m_terms.size();
+
+        while (checkNext && termIndex < numTerms) {
+            PatternTerm term = alternative->m_terms[termIndex];
+            checkNext = false;
+
+            switch (term.type) {
+            case PatternTerm::TypeAssertionBOL:
+            case PatternTerm::TypeAssertionEOL:
+            case PatternTerm::TypeAssertionWordBoundary:
+                return false;
+
+            case PatternTerm::TypeBackReference:
+            case PatternTerm::TypeForwardReference:
+                return false;
+
+            case PatternTerm::TypePatternCharacter:
+                if (addBeginTerm(term, beginTerms, alternative, numTerms, termIndex, depth)) {
+                    termIndex++;
+                    checkNext = true;
+                }
+                break;
+
+            case PatternTerm::TypeCharacterClass:
+                return false;
+
+            case PatternTerm::TypeParentheticalAssertion:
+                if (term.invertOrCapture)
+                    return false;
+
+            case PatternTerm::TypeParenthesesSubpattern:
+                if (term.quantityType != QuantifierFixedCount) {
+                    if (termIndex == numTerms - 1)
+                        break;
+
+                    termIndex++;
+                    checkNext = true;
+
+                }
+
+                if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))
+                    return false;
+
+                break;
+            }
+        }
+
+        return true;
+    }
+
+    void setupBeginChars()
+    {
+        Vector<TermChain> beginTerms;
+        bool containsFixedCharacter = false;
+
+        if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)
+                && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {
+            unsigned size = beginTerms.size();
+
+            // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.
+            if (!size)
+                return;
+
+            m_pattern.m_containsBeginChars = true;
+
+            for (unsigned i = 0; i < size; i++) {
+                PatternTerm term = beginTerms[i].term;
+
+                // We have just collected PatternCharacter terms, other terms are not allowed.
+                ASSERT(term.type == PatternTerm::TypePatternCharacter);
+
+                if (term.quantityType == QuantifierFixedCount)
+                    containsFixedCharacter = true;
+
+                UChar character = term.patternCharacter;
+                unsigned mask = 0;
+
+                if (character <= 0x7f) {
+                    if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {
+                        mask = 32;
+                        character = toASCIILower(character);
+                    }
+
+                    m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                } else {
+                    UChar upper, lower;
+                    if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {
+                        m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                        m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                    } else
+                        m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                }
+            }
+
+            // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.
+            if (!containsFixedCharacter) {
+                m_pattern.m_containsBeginChars = false;
+                return;
+            }
+
+            size = m_pattern.m_beginChars.size();
+
+            if (size > 2)
+                m_beginCharHelper.merge(size - 1);
+            else if (size <= 1)
+                m_pattern.m_containsBeginChars = false;
+        }
+    }
+
 private:
     RegexPattern& m_pattern;
     PatternAlternative* m_alternative;
     CharacterClassConstructor m_characterClassConstructor;
+    BeginCharHelper m_beginCharHelper;
     bool m_invertCharacterClass;
+    bool m_invertParentheticalAssertion;
 };
 
 
@@ -717,12 +960,14 @@ const char* compileRegex(const UString& patternString, RegexPattern& pattern)
         ASSERT(numSubpatterns == pattern.m_numSubpatterns);
     }
 
+    constructor.checkForTerminalParentheses();
+    constructor.optimizeBOL();
+        
     constructor.setupOffsets();
+    constructor.setupBeginChars();
 
-    return false;
+    return 0;
 };
 
 
 } }
-
-#endif
diff --git a/JavaScriptCore/yarr/RegexCompiler.h b/JavaScriptCore/yarr/RegexCompiler.h
index 3ed2be9..399374e 100644
--- a/JavaScriptCore/yarr/RegexCompiler.h
+++ b/JavaScriptCore/yarr/RegexCompiler.h
@@ -26,13 +26,9 @@
 #ifndef RegexCompiler_h
 #define RegexCompiler_h
 
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
-#include <wtf/unicode/Unicode.h>
 #include "RegexParser.h"
 #include "RegexPattern.h"
+#include <wtf/unicode/Unicode.h>
 
 namespace JSC { namespace Yarr {
 
@@ -40,6 +36,4 @@ const char* compileRegex(const UString& patternString, RegexPattern& pattern);
 
 } } // namespace JSC::Yarr
 
-#endif
-
 #endif // RegexCompiler_h
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index d088086..164158e 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -28,13 +29,12 @@
 
 #include "RegexCompiler.h"
 #include "RegexPattern.h"
+#include <wtf/BumpPointerAllocator.h>
 
 #ifndef NDEBUG
 #include <stdio.h>
 #endif
 
-#if ENABLE(YARR)
-
 using namespace WTF;
 
 namespace JSC { namespace Yarr {
@@ -60,7 +60,10 @@ public:
         uintptr_t begin;
     };
     struct BackTrackInfoParenthesesOnce {
-        uintptr_t inParentheses;
+        uintptr_t begin;
+    };
+    struct BackTrackInfoParenthesesTerminal {
+        uintptr_t begin;
     };
     struct BackTrackInfoParentheses {
         uintptr_t matchAmount;
@@ -104,12 +107,16 @@ public:
 
     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
     {
-        return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext();
+        size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+        allocatorPool = allocatorPool->ensureCapacity(size);
+        if (!allocatorPool)
+            CRASH();
+        return new(allocatorPool->alloc(size)) DisjunctionContext();
     }
 
     void freeDisjunctionContext(DisjunctionContext* context)
     {
-        free(context);
+        allocatorPool = allocatorPool->dealloc(context);
     }
 
     struct ParenthesesDisjunctionContext
@@ -150,12 +157,16 @@ public:
 
     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
     {
-        return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term);
+        size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+        allocatorPool = allocatorPool->ensureCapacity(size);
+        if (!allocatorPool)
+            CRASH();
+        return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
     }
 
     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
     {
-        free(context);
+        allocatorPool = allocatorPool->dealloc(context);
     }
 
     class InputStream {
@@ -186,6 +197,12 @@ public:
             return -1;
         }
 
+        int readPair()
+        {
+            ASSERT(pos + 1 < length);
+            return input[pos] | input[pos + 1] << 16;
+        }
+
         int readChecked(int position)
         {
             ASSERT(position < 0);
@@ -253,6 +270,11 @@ public:
             return (pos + position) == length;
         }
 
+        bool isNotAvailableInput(int position)
+        {
+            return (pos + position) > length;
+        }
+
     private:
         const UChar* input;
         unsigned pos;
@@ -280,20 +302,6 @@ public:
         return false;
     }
 
-    bool tryConsumeCharacter(int testChar)
-    {
-        if (input.atEnd())
-            return false;
-
-        int ch = input.read();
-
-        if (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch)) {
-            input.next();
-            return true;
-        }
-        return false;
-    }
-
     bool checkCharacter(int testChar, int inputPosition)
     {
         return testChar == input.readChecked(inputPosition);
@@ -305,23 +313,6 @@ public:
         return (loChar == ch) || (hiChar == ch);
     }
 
-    bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert)
-    {
-        if (input.atEnd())
-            return false;
-
-        bool match = testCharacterClass(characterClass, input.read());
-
-        if (invert)
-            match = !match;
-
-        if (match) {
-            input.next();
-            return true;
-        }
-        return false;
-    }
-
     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
     {
         bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
@@ -335,10 +326,24 @@ public:
         if (!input.checkInput(matchSize))
             return false;
 
-        for (int i = 0; i < matchSize; ++i) {
-            if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
-                input.uncheckInput(matchSize);
-                return false;
+        if (pattern->m_ignoreCase) {
+            for (int i = 0; i < matchSize; ++i) {
+                int ch = input.reread(matchBegin + i);
+
+                int lo = Unicode::toLower(ch);
+                int hi = Unicode::toUpper(ch);
+
+                if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
+                    input.uncheckInput(matchSize);
+                    return false;
+                }
+            }
+        } else {
+            for (int i = 0; i < matchSize; ++i) {
+                if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
+                    input.uncheckInput(matchSize);
+                    return false;
+                }
             }
         }
 
@@ -503,6 +508,13 @@ public:
 
         int matchBegin = output[(term.atom.subpatternId << 1)];
         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+
+        // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
+        // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
+        // Eg.: /(a\1)/
+        if (matchEnd == -1)
+            return true;
+
         ASSERT((matchBegin == -1) == (matchEnd == -1));
         ASSERT(matchBegin <= matchEnd);
 
@@ -592,27 +604,24 @@ public:
         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
         context->restoreOutput(output, firstSubpatternId, count);
     }
-    void resetAssertionMatches(ByteTerm& term)
-    {
-        unsigned firstSubpatternId = term.atom.subpatternId;
-        unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
-        for (unsigned i = 0; i < (count << 1); ++i)
-            output[(firstSubpatternId << 1) + i] = -1;
-    }
-    bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
+    JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
     {
         while (backTrack->matchAmount) {
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 
-            if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
-                return true;
+            JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
+            if (result == JSRegExpMatch)
+                return JSRegExpMatch;
 
             resetMatches(term, context);
             popParenthesesDisjunctionContext(backTrack);
             freeParenthesesDisjunctionContext(context);
+
+            if (result != JSRegExpNoMatch)
+                return result;
         }
 
-        return false;
+        return JSRegExpNoMatch;
     }
 
     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
@@ -625,11 +634,11 @@ public:
         switch (term.atom.quantityType) {
         case QuantifierGreedy: {
             // set this speculatively; if we get to the parens end this will be true.
-            backTrack->inParentheses = 1;
+            backTrack->begin = input.getPos();
             break;
         }
         case QuantifierNonGreedy: {
-            backTrack->inParentheses = 0;
+            backTrack->begin = notFound;
             context->term += term.atom.parenthesesWidth;
             return true;
         }
@@ -645,7 +654,7 @@ public:
         return true;
     }
 
-    bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*)
+    bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
         ASSERT(term.atom.quantityCount == 1);
@@ -654,7 +663,12 @@ public:
             unsigned subpatternId = term.atom.subpatternId;
             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
         }
-        return true;
+
+        if (term.atom.quantityType == QuantifierFixedCount)
+            return true;
+
+        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+        return backTrack->begin != input.getPos();
     }
 
     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
@@ -673,12 +687,12 @@ public:
         switch (term.atom.quantityType) {
         case QuantifierGreedy:
             // if we backtrack to this point, there is another chance - try matching nothing.
-            ASSERT(backTrack->inParentheses);
-            backTrack->inParentheses = 0;
+            ASSERT(backTrack->begin != notFound);
+            backTrack->begin = notFound;
             context->term += term.atom.parenthesesWidth;
             return true;
         case QuantifierNonGreedy:
-            ASSERT(backTrack->inParentheses);
+            ASSERT(backTrack->begin != notFound);
         case QuantifierFixedCount:
             break;
         }
@@ -695,17 +709,21 @@ public:
 
         switch (term.atom.quantityType) {
         case QuantifierGreedy:
-            if (!backTrack->inParentheses) {
+            if (backTrack->begin == notFound) {
                 context->term -= term.atom.parenthesesWidth;
                 return false;
             }
         case QuantifierNonGreedy:
-            if (!backTrack->inParentheses) {
-                // now try to match the parens; set this speculatively.
-                backTrack->inParentheses = 1;
+            if (backTrack->begin == notFound) {
+                backTrack->begin = input.getPos();
                 if (term.capture()) {
+                    // Technically this access to inputPosition should be accessing the begin term's
+                    // inputPosition, but for repeats other than fixed these values should be
+                    // the same anyway! (we don't pre-check for greedy or non-greedy matches.)
+                    ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+                    ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
                     unsigned subpatternId = term.atom.subpatternId;
-                    output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+                    output[subpatternId << 1] = input.getPos() + term.inputPosition;
                 }
                 context->term -= term.atom.parenthesesWidth;
                 return true;
@@ -717,6 +735,53 @@ public:
         return false;
     }
 
+    bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
+    {
+        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
+        ASSERT(term.atom.quantityType == QuantifierGreedy);
+        ASSERT(term.atom.quantityCount == UINT_MAX);
+        ASSERT(!term.capture());
+
+        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
+        backTrack->begin = input.getPos();
+        return true;
+    }
+
+    bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
+    {
+        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
+
+        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
+        // Empty match is a failed match.
+        if (backTrack->begin == input.getPos())
+            return false;
+
+        // Successful match! Okay, what's next? - loop around and try to match moar!
+        context->term -= (term.atom.parenthesesWidth + 1);
+        return true;
+    }
+
+    bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
+    {
+        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
+        ASSERT(term.atom.quantityType == QuantifierGreedy);
+        ASSERT(term.atom.quantityCount == UINT_MAX);
+        ASSERT(!term.capture());
+
+        // If we backtrack to this point, we have failed to match this iteration of the parens.
+        // Since this is greedy / zero minimum a failed is also accepted as a match!
+        context->term += term.atom.parenthesesWidth;
+        return true;
+    }
+
+    bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
+    {
+        // 'Terminal' parentheses are at the end of the regex, and as such a match past end
+        // should always be returned as a successful match - we should never becktrack to here.
+        ASSERT_NOT_REACHED();
+        return false;
+    }
+
     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
@@ -773,7 +838,7 @@ public:
         return false;
     }
 
-    bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
+    JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 
@@ -794,31 +859,42 @@ public:
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
-                    if (!parenthesesDoBacktrack(term, backTrack))
-                        return false;
+
+                    if (result == JSRegExpNoMatch) {
+                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                        if (backtrackResult != JSRegExpMatch)
+                            return backtrackResult;
+                    } else
+                        return result;
                 }
             }
 
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierGreedy: {
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
+
+                    if (result != JSRegExpNoMatch)
+                        return result;
+
                     break;
                 }
             }
@@ -827,15 +903,15 @@ public:
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierNonGreedy:
-            return true;
+            return JSRegExpMatch;
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
@@ -848,7 +924,7 @@ public:
     // Non-greedy, we've already done the one less case, so don't match on popping.
     // We haven't done the one more case, so always try to add that.
     //
-    bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
+    JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 
@@ -867,44 +943,58 @@ public:
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 
             ParenthesesDisjunctionContext* context = 0;
+            JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
 
-            if (!parenthesesDoBacktrack(term, backTrack))
-                return false;
+            if (result != JSRegExpMatch)
+                return result;
 
             // While we haven't yet reached our fixed limit,
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
-                    if (!parenthesesDoBacktrack(term, backTrack))
-                        return false;
+
+                    if (result == JSRegExpNoMatch) {
+                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                        if (backtrackResult != JSRegExpMatch)
+                            return backtrackResult;
+                    } else
+                        return result;
                 }
             }
 
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierGreedy: {
             if (!backTrack->matchAmount)
-                return false;
+                return JSRegExpNoMatch;
 
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
-            if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+            JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+            if (result == JSRegExpMatch) {
                 while (backTrack->matchAmount < term.atom.quantityCount) {
                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                    if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                    JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                    if (parenthesesResult == JSRegExpMatch)
                         appendParenthesesDisjunctionContext(backTrack, context);
                     else {
                         resetMatches(term, context);
                         freeParenthesesDisjunctionContext(context);
+
+                        if (parenthesesResult != JSRegExpNoMatch)
+                            return parenthesesResult;
+
                         break;
                     }
                 }
@@ -912,63 +1002,108 @@ public:
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
+
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             if (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierNonGreedy: {
             // If we've not reached the limit, try to add one more match.
             if (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch) {
                     appendParenthesesDisjunctionContext(backTrack, context);
                     recordParenthesesMatch(term, context);
-                    return true;
-                } else {
-                    resetMatches(term, context);
-                    freeParenthesesDisjunctionContext(context);
+                    return JSRegExpMatch;
                 }
+
+                resetMatches(term, context);
+                freeParenthesesDisjunctionContext(context);
+
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             // Nope - okay backtrack looking for an alternative.
             while (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+                if (result == JSRegExpMatch) {
                     // successful backtrack! we're back in the game!
                     if (backTrack->matchAmount) {
                         context = backTrack->lastContext;
                         recordParenthesesMatch(term, context);
                     }
-                    return true;
+                    return JSRegExpMatch;
                 }
 
                 // pop a match off the stack
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
+
+                return result;
             }
 
-            return false;
+            return JSRegExpNoMatch;
         }
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
+    }
+
+    void lookupForBeginChars()
+    {
+        int character;
+        bool firstSingleCharFound;
+
+        while (true) {
+            if (input.isNotAvailableInput(2))
+                return;
+
+            firstSingleCharFound = false;
+
+            character = input.readPair();
+
+            for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
+                BeginChar bc = pattern->m_beginChars[i];
+
+                if (!firstSingleCharFound && bc.value <= 0xFFFF) {
+                    firstSingleCharFound = true;
+                    character &= 0xFFFF;
+                }
+
+                if ((character | bc.mask) == bc.value)
+                    return;
+            }
+
+            input.next();
+        }
     }
 
 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
 #define BACKTRACK() { --context->term; goto backtrack; }
 #define currentTerm() (disjunction->terms[context->term])
-    bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
     {
+        if (!--remainingMatchCount)
+            return JSRegExpErrorHitLimit;
+
         if (btrack)
             BACKTRACK();
 
+        if (pattern->m_containsBeginChars && isBody)
+            lookupForBeginChars();
+
         context->matchBegin = input.getPos();
         context->term = 0;
 
@@ -980,14 +1115,14 @@ public:
             MATCH_NEXT();
         case ByteTerm::TypeSubpatternEnd:
             context->matchEnd = input.getPos();
-            return true;
+            return JSRegExpMatch;
 
         case ByteTerm::TypeBodyAlternativeBegin:
             MATCH_NEXT();
         case ByteTerm::TypeBodyAlternativeDisjunction:
         case ByteTerm::TypeBodyAlternativeEnd:
             context->matchEnd = input.getPos();
-            return true;
+            return JSRegExpMatch;
 
         case ByteTerm::TypeAlternativeBegin:
             MATCH_NEXT();
@@ -1077,10 +1212,16 @@ public:
             if (matchBackReference(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
-        case ByteTerm::TypeParenthesesSubpattern:
-            if (matchParentheses(currentTerm(), context))
+        case ByteTerm::TypeParenthesesSubpattern: {
+            JSRegExpResult result = matchParentheses(currentTerm(), context);
+
+            if (result == JSRegExpMatch) {
                 MATCH_NEXT();
+            }  else if (result != JSRegExpNoMatch)
+                return result;
+
             BACKTRACK();
+        }
         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
             if (matchParenthesesOnceBegin(currentTerm(), context))
                 MATCH_NEXT();
@@ -1089,6 +1230,14 @@ public:
             if (matchParenthesesOnceEnd(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
+        case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+            if (matchParenthesesTerminalBegin(currentTerm(), context))
+                MATCH_NEXT();
+            BACKTRACK();
+        case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+            if (matchParenthesesTerminalEnd(currentTerm(), context))
+                MATCH_NEXT();
+            BACKTRACK();
         case ByteTerm::TypeParentheticalAssertionBegin:
             if (matchParentheticalAssertionBegin(currentTerm(), context))
                 MATCH_NEXT();
@@ -1112,7 +1261,7 @@ public:
 
         switch (currentTerm().type) {
         case ByteTerm::TypeSubpatternBegin:
-            return false;
+            return JSRegExpNoMatch;
         case ByteTerm::TypeSubpatternEnd:
             ASSERT_NOT_REACHED();
 
@@ -1124,10 +1273,18 @@ public:
                 MATCH_NEXT();
 
             if (input.atEnd())
-                return false;
+                return JSRegExpNoMatch;
 
             input.next();
+
+            if (pattern->m_containsBeginChars && isBody)
+                lookupForBeginChars();
+
             context->matchBegin = input.getPos();
+
+            if (currentTerm().alternative.onceThrough)
+                context->term += currentTerm().alternative.next;
+
             MATCH_NEXT();
         }
         case ByteTerm::TypeBodyAlternativeEnd:
@@ -1176,10 +1333,16 @@ public:
                 if (backtrackBackReference(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
-            case ByteTerm::TypeParenthesesSubpattern:
-                if (backtrackParentheses(currentTerm(), context))
+            case ByteTerm::TypeParenthesesSubpattern: {
+                JSRegExpResult result = backtrackParentheses(currentTerm(), context);
+
+                if (result == JSRegExpMatch) {
                     MATCH_NEXT();
+                } else if (result != JSRegExpNoMatch)
+                    return result;
+
                 BACKTRACK();
+            }
             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
                     MATCH_NEXT();
@@ -1188,6 +1351,14 @@ public:
                 if (backtrackParenthesesOnceEnd(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
+            case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+                if (backtrackParenthesesTerminalBegin(currentTerm(), context))
+                    MATCH_NEXT();
+                BACKTRACK();
+            case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+                if (backtrackParenthesesTerminalEnd(currentTerm(), context))
+                    MATCH_NEXT();
+                BACKTRACK();
             case ByteTerm::TypeParentheticalAssertionBegin:
                 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
                     MATCH_NEXT();
@@ -1203,36 +1374,48 @@ public:
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
-    bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
-        if (matchDisjunction(disjunction, context, btrack)) {
+        JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
+
+        if (result == JSRegExpMatch) {
             while (context->matchBegin == context->matchEnd) {
-                if (!matchDisjunction(disjunction, context, true))
-                    return false;
+                result = matchDisjunction(disjunction, context, true);
+                if (result != JSRegExpMatch)
+                    return result;
             }
-            return true;
+            return JSRegExpMatch;
         }
 
-        return false;
+        return result;
     }
 
     int interpret()
     {
+        allocatorPool = pattern->m_allocator->startAllocator();
+        if (!allocatorPool)
+            CRASH();
+
         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
             output[i] = -1;
 
         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
 
-        if (matchDisjunction(pattern->m_body.get(), context)) {
+        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
+        if (result == JSRegExpMatch) {
             output[0] = context->matchBegin;
             output[1] = context->matchEnd;
         }
 
         freeDisjunctionContext(context);
 
+        pattern->m_allocator->stopAllocator();
+
+        // RegExp.cpp currently expects all error to be converted to -1.
+        ASSERT((result == JSRegExpMatch) == (output[0] != -1));
         return output[0];
     }
 
@@ -1240,6 +1423,8 @@ public:
         : pattern(pattern)
         , output(output)
         , input(inputChar, start, length)
+        , allocatorPool(0)
+        , remainingMatchCount(matchLimit)
     {
     }
 
@@ -1247,6 +1432,8 @@ private:
     BytecodePattern *pattern;
     int* output;
     InputStream input;
+    BumpPointerPool* allocatorPool;
+    unsigned remainingMatchCount;
 };
 
 
@@ -1266,17 +1453,16 @@ public:
     ByteCompiler(RegexPattern& pattern)
         : m_pattern(pattern)
     {
-        m_bodyDisjunction = 0;
         m_currentAlternativeIndex = 0;
     }
 
-    BytecodePattern* compile()
+    PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
     {
-        regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize);
+        regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
         emitDisjunction(m_pattern.m_body);
         regexEnd();
 
-        return new BytecodePattern(m_bodyDisjunction, m_allParenthesesInfo, m_pattern);
+        return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
     }
 
     void checkInput(unsigned count)
@@ -1334,8 +1520,38 @@ public:
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     }
 
+    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+    {
+        int beginTerm = m_bodyDisjunction->terms.size();
+
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
+        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+        m_currentAlternativeIndex = beginTerm + 1;
+    }
+
+    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+    {
+        int beginTerm = m_bodyDisjunction->terms.size();
+
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, inputPosition));
+        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
+        m_currentAlternativeIndex = beginTerm + 1;
+    }
+
     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
     {
+        // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
+        // then fix this up at the end! - simplifying this should make it much clearer.
+        // https://bugs.webkit.org/show_bug.cgi?id=50136
+
         int beginTerm = m_bodyDisjunction->terms.size();
 
         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
@@ -1360,6 +1576,28 @@ public:
         m_currentAlternativeIndex = beginTerm + 1;
     }
 
+    void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    {
+        unsigned beginTerm = popParenthesesStack();
+        closeAlternative(beginTerm + 1);
+        unsigned endTerm = m_bodyDisjunction->terms.size();
+
+        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
+
+        bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
+        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
+
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, invertOrCapture, inputPosition));
+        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
+
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+    }
+
     unsigned popParenthesesStack()
     {
         ASSERT(m_parenthesesStack.size());
@@ -1431,56 +1669,85 @@ public:
         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
     }
 
-    void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+    void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+    {
+        unsigned beginTerm = popParenthesesStack();
+        closeAlternative(beginTerm + 1);
+        unsigned endTerm = m_bodyDisjunction->terms.size();
+
+        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+
+        ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
+
+        bool invertOrCapture = parenthesesBegin.invertOrCapture;
+        unsigned subpatternId = parenthesesBegin.atom.subpatternId;
+
+        unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
+        ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+
+        parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
+        for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
+            parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
+        parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
+
+        m_bodyDisjunction->terms.shrink(beginTerm);
+
+        m_allParenthesesInfo.append(parenthesesDisjunction);
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
+
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+        m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+    }
+
+    void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     {
         unsigned beginTerm = popParenthesesStack();
         closeAlternative(beginTerm + 1);
         unsigned endTerm = m_bodyDisjunction->terms.size();
 
-        bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
+        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+
         bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
 
-        m_bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
 
-        if (doInline) {
-            m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
-            m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
-            m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
-            m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
-        } else {
-            ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
-            ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
-
-            bool invertOrCapture = parenthesesBegin.invertOrCapture;
-            unsigned subpatternId = parenthesesBegin.atom.subpatternId;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+    }
 
-            unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
-            ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+    void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+    {
+        unsigned beginTerm = popParenthesesStack();
+        closeAlternative(beginTerm + 1);
+        unsigned endTerm = m_bodyDisjunction->terms.size();
 
-            parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
-            for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
-                parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
-            parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
+        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
 
-            m_bodyDisjunction->terms.shrink(beginTerm);
+        bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
+        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
 
-            m_allParenthesesInfo.append(parenthesesDisjunction);
-            m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
+        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, invertOrCapture, inputPosition));
+        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
 
-            m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
-            m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
-            m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
-        }
+        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
     }
 
-    void regexBegin(unsigned numSubpatterns, unsigned callFrameSize)
+    void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
     {
-        m_bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
-        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
+        m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
         m_bodyDisjunction->terms[0].frameLocation = 0;
         m_currentAlternativeIndex = 0;
     }
@@ -1490,11 +1757,11 @@ public:
         closeBodyAlternative();
     }
 
-    void alternativeBodyDisjunction()
+    void alternativeBodyDisjunction(bool onceThrough)
     {
         int newAlternativeIndex = m_bodyDisjunction->terms.size();
         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
-        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
+        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
 
         m_currentAlternativeIndex = newAlternativeIndex;
     }
@@ -1508,26 +1775,33 @@ public:
         m_currentAlternativeIndex = newAlternativeIndex;
     }
 
-    void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
+    void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0, bool isParentheticalAssertion = false)
     {
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
 
+            PatternAlternative* alternative = disjunction->m_alternatives[alt];
+
             if (alt) {
                 if (disjunction == m_pattern.m_body)
-                    alternativeBodyDisjunction();
+                    alternativeBodyDisjunction(alternative->onceThrough());
                 else
                     alternativeDisjunction();
             }
 
-            PatternAlternative* alternative = disjunction->m_alternatives[alt];
             unsigned minimumSize = alternative->m_minimumSize;
+            int countToCheck;
 
-            ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
-            unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
-            if (countToCheck)
+            if (isParentheticalAssertion && parenthesesInputCountAlreadyChecked > minimumSize)
+                countToCheck = 0;
+            else
+                countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
+
+            ASSERT(countToCheck >= 0);
+            if (countToCheck) {
                 checkInput(countToCheck);
-            currentCountAlreadyChecked += countToCheck;
+                currentCountAlreadyChecked += countToCheck;
+            }
 
             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
                 PatternTerm& term = alternative->m_terms[i];
@@ -1562,34 +1836,40 @@ public:
 
                 case PatternTerm::TypeParenthesesSubpattern: {
                     unsigned disjunctionAlreadyCheckedCount = 0;
-                    if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
-                        if (term.quantityType == QuantifierFixedCount) {
+                    if (term.quantityCount == 1 && !term.parentheses.isCopy) {
+                        unsigned alternativeFrameLocation = term.frameLocation;
+                        // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
+                        if (term.quantityType == QuantifierFixedCount)
                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
-                            unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
-                            atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
-                            emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
-                            atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
-                        } else {
-                            unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
-                            atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
-                            emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
-                            atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
-                        }
+                        else
+                            alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
+                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
+                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
+                        atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
+                    } else if (term.parentheses.isTerminal) {
+                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
+                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
+                        atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
                     } else {
                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
-                        atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+                        atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
                     }
                     break;
                 }
 
                 case PatternTerm::TypeParentheticalAssertion: {
-                    unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
+                    unsigned alternativeFrameLocation = term.frameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
+
+                    ASSERT(currentCountAlreadyChecked >= (unsigned)term.inputPosition);
+                    int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
 
                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
-                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
-                    atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
+                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true);
+                    atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
                     break;
                 }
                 }
@@ -1599,23 +1879,28 @@ public:
 
 private:
     RegexPattern& m_pattern;
-    ByteDisjunction* m_bodyDisjunction;
+    OwnPtr<ByteDisjunction> m_bodyDisjunction;
     unsigned m_currentAlternativeIndex;
     Vector<ParenthesesStackEntry> m_parenthesesStack;
     Vector<ByteDisjunction*> m_allParenthesesInfo;
 };
 
 
-BytecodePattern* byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
+PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase, bool multiline)
 {
     RegexPattern pattern(ignoreCase, multiline);
 
     if ((error = compileRegex(patternString, pattern)))
-        return 0;
+        return PassOwnPtr<BytecodePattern>();
 
     numSubpatterns = pattern.m_numSubpatterns;
 
-    return ByteCompiler(pattern).compile();
+    return ByteCompiler(pattern).compile(allocator);
+}
+
+PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator* allocator)
+{
+    return ByteCompiler(pattern).compile(allocator);
 }
 
 int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
@@ -1634,5 +1919,3 @@ COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpace
 
 
 } }
-
-#endif
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index 48c9a5e..2e23472 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,16 +26,33 @@
 #ifndef RegexInterpreter_h
 #define RegexInterpreter_h
 
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
-#include <wtf/unicode/Unicode.h>
 #include "RegexParser.h"
 #include "RegexPattern.h"
+#include <wtf/PassOwnPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WTF {
+class BumpPointerAllocator;
+}
+using WTF::BumpPointerAllocator;
 
 namespace JSC { namespace Yarr {
 
+// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
+
+// The below limit restricts the number of "recursive" match calls in order to
+// avoid spending exponential time on complex regular expressions.
+static const unsigned matchLimit = 1000000;
+
+enum JSRegExpResult {
+    JSRegExpMatch = 1,
+    JSRegExpNoMatch = 0,
+    JSRegExpErrorNoMatch = -1,
+    JSRegExpErrorHitLimit = -2,
+    JSRegExpErrorNoMemory = -3,
+    JSRegExpErrorInternal = -4
+};
+
 class ByteDisjunction;
 
 struct ByteTerm {
@@ -64,6 +81,8 @@ struct ByteTerm {
         TypeParenthesesSubpattern,
         TypeParenthesesSubpatternOnceBegin,
         TypeParenthesesSubpatternOnceEnd,
+        TypeParenthesesSubpatternTerminalBegin,
+        TypeParenthesesSubpatternTerminalEnd,
         TypeParentheticalAssertionBegin,
         TypeParentheticalAssertionEnd,
         TypeCheckInput,
@@ -90,6 +109,7 @@ struct ByteTerm {
         struct {
             int next;
             int end;
+            bool onceThrough;
         } alternative;
         unsigned checkInputCount;
     };
@@ -211,19 +231,21 @@ struct ByteTerm {
         return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
     }
 
-    static ByteTerm BodyAlternativeBegin()
+    static ByteTerm BodyAlternativeBegin(bool onceThrough)
     {
         ByteTerm term(TypeBodyAlternativeBegin);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = onceThrough;
         return term;
     }
 
-    static ByteTerm BodyAlternativeDisjunction()
+    static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
     {
         ByteTerm term(TypeBodyAlternativeDisjunction);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = onceThrough;
         return term;
     }
 
@@ -232,6 +254,7 @@ struct ByteTerm {
         ByteTerm term(TypeBodyAlternativeEnd);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -240,6 +263,7 @@ struct ByteTerm {
         ByteTerm term(TypeAlternativeBegin);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -248,6 +272,7 @@ struct ByteTerm {
         ByteTerm term(TypeAlternativeDisjunction);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -256,6 +281,7 @@ struct ByteTerm {
         ByteTerm term(TypeAlternativeEnd);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -294,10 +320,12 @@ public:
 };
 
 struct BytecodePattern : FastAllocBase {
-    BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
+    BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern, BumpPointerAllocator* allocator)
         : m_body(body)
         , m_ignoreCase(pattern.m_ignoreCase)
         , m_multiline(pattern.m_multiline)
+        , m_containsBeginChars(pattern.m_containsBeginChars)
+        , m_allocator(allocator)
     {
         newlineCharacterClass = pattern.newlineCharacterClass();
         wordcharCharacterClass = pattern.wordcharCharacterClass();
@@ -308,6 +336,8 @@ struct BytecodePattern : FastAllocBase {
         // array, so that it won't delete them on destruction.  We'll
         // take responsibility for that.
         pattern.m_userCharacterClasses.clear();
+
+        m_beginChars.append(pattern.m_beginChars);
     }
 
     ~BytecodePattern()
@@ -319,19 +349,25 @@ struct BytecodePattern : FastAllocBase {
     OwnPtr<ByteDisjunction> m_body;
     bool m_ignoreCase;
     bool m_multiline;
-    
+    bool m_containsBeginChars;
+    // Each BytecodePattern is associated with a RegExp, each RegExp is associated
+    // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regexAllocator.
+    BumpPointerAllocator* m_allocator;
+
     CharacterClass* newlineCharacterClass;
     CharacterClass* wordcharCharacterClass;
+
+    Vector<BeginChar> m_beginChars;
+
 private:
     Vector<ByteDisjunction*> m_allParenthesesInfo;
     Vector<CharacterClass*> m_userCharacterClasses;
 };
 
-BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator*, bool ignoreCase = false, bool multiline = false);
+PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*);
 int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
 
 } } // namespace JSC::Yarr
 
-#endif
-
 #endif // RegexInterpreter_h
diff --git a/JavaScriptCore/yarr/RegexJIT.cpp b/JavaScriptCore/yarr/RegexJIT.cpp
index fcb8d86..acbd458 100644
--- a/JavaScriptCore/yarr/RegexJIT.cpp
+++ b/JavaScriptCore/yarr/RegexJIT.cpp
@@ -31,8 +31,7 @@
 #include "LinkBuffer.h"
 #include "MacroAssembler.h"
 #include "RegexCompiler.h"
-
-#include "pcre.h" // temporary, remove when fallback is removed.
+#include "RegexInterpreter.h" // temporary, remove when fallback is removed.
 
 #if ENABLE(YARR_JIT)
 
@@ -40,7 +39,6 @@ using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
-
 class RegexGenerator : private MacroAssembler {
     friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
 
@@ -54,6 +52,16 @@ class RegexGenerator : private MacroAssembler {
     static const RegisterID regT1 = ARMRegisters::r6;
 
     static const RegisterID returnRegister = ARMRegisters::r0;
+#elif CPU(MIPS)
+    static const RegisterID input = MIPSRegisters::a0;
+    static const RegisterID index = MIPSRegisters::a1;
+    static const RegisterID length = MIPSRegisters::a2;
+    static const RegisterID output = MIPSRegisters::a3;
+
+    static const RegisterID regT0 = MIPSRegisters::t4;
+    static const RegisterID regT1 = MIPSRegisters::t5;
+
+    static const RegisterID returnRegister = MIPSRegisters::v0;
 #elif CPU(X86)
     static const RegisterID input = X86Registers::eax;
     static const RegisterID index = X86Registers::edx;
@@ -145,6 +153,11 @@ class RegexGenerator : private MacroAssembler {
 
     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
     {
+        if (charClass->m_table) {
+            ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
+            matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
+            return;
+        }
         Jump unicodeFail;
         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
             Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
@@ -331,6 +344,15 @@ class RegexGenerator : private MacroAssembler {
             ASSERT(alternativeValid());
             return alternative()->m_terms[t];
         }
+        bool isLastTerm()
+        {
+            ASSERT(alternativeValid());
+            return (t + 1) == alternative()->m_terms.size();
+        }
+        bool isMainDisjunction()
+        {
+            return !disjunction->m_parent;
+        }
 
         PatternTerm& lookaheadTerm()
         {
@@ -599,10 +621,14 @@ class RegexGenerator : private MacroAssembler {
             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
             failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
         }
+
         add32(Imm32(1), countRegister);
         add32(Imm32(1), index);
-        branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
-        failures.append(jump());
+        if (term.quantityCount != 0xffffffff) {
+            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+            failures.append(jump());
+        } else
+            jump(loop);
 
         Label backtrackBegin(this);
         loadFromFrame(term.frameLocation, countRegister);
@@ -636,7 +662,8 @@ class RegexGenerator : private MacroAssembler {
         loadFromFrame(term.frameLocation, countRegister);
 
         atEndOfInput().linkTo(hardFail, this);
-        branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+        if (term.quantityCount != 0xffffffff)
+            branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
             readCharacter(state.inputOffset(), character);
             or32(Imm32(32), character);
@@ -722,8 +749,11 @@ class RegexGenerator : private MacroAssembler {
 
         add32(Imm32(1), countRegister);
         add32(Imm32(1), index);
-        branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
-        failures.append(jump());
+        if (term.quantityCount != 0xffffffff) {
+            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+            failures.append(jump());
+        } else
+            jump(loop);
 
         Label backtrackBegin(this);
         loadFromFrame(term.frameLocation, countRegister);
@@ -880,7 +910,7 @@ class RegexGenerator : private MacroAssembler {
         PatternDisjunction* disjunction = term.parentheses.disjunction;
         ASSERT(term.quantityCount == 1);
 
-        unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
+        unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0;
 
         unsigned parenthesesFrameLocation = term.frameLocation;
         unsigned alternativeFrameLocation = parenthesesFrameLocation;
@@ -899,12 +929,12 @@ class RegexGenerator : private MacroAssembler {
             Jump nonGreedySkipParentheses;
             Label nonGreedyTryParentheses;
             if (term.quantityType == QuantifierGreedy)
-                storeToFrame(Imm32(1), parenthesesFrameLocation);
+                storeToFrame(index, parenthesesFrameLocation);
             else if (term.quantityType == QuantifierNonGreedy) {
-                storeToFrame(Imm32(0), parenthesesFrameLocation);
+                storeToFrame(Imm32(-1), parenthesesFrameLocation);
                 nonGreedySkipParentheses = jump();
                 nonGreedyTryParentheses = label();
-                storeToFrame(Imm32(1), parenthesesFrameLocation);
+                storeToFrame(index, parenthesesFrameLocation);
             }
 
             // store the match start index
@@ -922,41 +952,31 @@ class RegexGenerator : private MacroAssembler {
             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
 
-            // store the match end index
-            if (term.invertOrCapture) {
-                int inputOffset = state.inputOffset();
-                if (inputOffset) {
-                    move(index, indexTemporary);
-                    add32(Imm32(state.inputOffset()), indexTemporary);
-                    store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
-                } else
-                    store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
-            }
-            Jump success = jump();
+            Jump success = (term.quantityType == QuantifierFixedCount) ?
+                jump() :
+                branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))));
 
             // A failure AFTER the parens jumps here
             Label backtrackFromAfterParens(this);
 
             if (term.quantityType == QuantifierGreedy) {
-                // If this is zero we have now tested with both with and without the parens.
+                // If this is -1 we have now tested with both with and without the parens.
                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
-                state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
+                state.jumpToBacktrack(branch32(Equal, indexTemporary, Imm32(-1)), this);
             } else if (term.quantityType == QuantifierNonGreedy) {
-                // If this is zero we have now tested with both with and without the parens.
+                // If this is -1 we have now tested without the parens, now test with.
                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
-                branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
+                branch32(Equal, indexTemporary, Imm32(-1)).linkTo(nonGreedyTryParentheses, this);
             }
 
             parenthesesState.plantJumpToBacktrackIfExists(this);
             // A failure WITHIN the parens jumps here
             parenthesesState.linkAlternativeBacktracks(this);
-            if (term.invertOrCapture) {
+            if (term.invertOrCapture)
                 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
-                store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
-            }
 
             if (term.quantityType == QuantifierGreedy)
-                storeToFrame(Imm32(0), parenthesesFrameLocation);
+                storeToFrame(Imm32(-1), parenthesesFrameLocation);
             else
                 state.jumpToBacktrack(jump(), this);
 
@@ -964,9 +984,67 @@ class RegexGenerator : private MacroAssembler {
             if (term.quantityType == QuantifierNonGreedy)
                 nonGreedySkipParentheses.link(this);
             success.link(this);
+
+            // store the match end index
+            if (term.invertOrCapture) {
+                int inputOffset = state.inputOffset();
+                if (inputOffset) {
+                    move(index, indexTemporary);
+                    add32(Imm32(state.inputOffset()), indexTemporary);
+                    store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+                } else
+                    store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+            }
         }
     }
 
+    void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
+    {
+        PatternTerm& parenthesesTerm = state.term();
+        PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
+        ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
+        ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
+
+        TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+
+        Label matchAgain(this);
+
+        storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later.
+
+        for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
+
+            PatternAlternative* alternative = parenthesesState.alternative();
+            optimizeAlternative(alternative);
+
+            int countToCheck = alternative->m_minimumSize;
+            if (countToCheck) {
+                parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
+                parenthesesState.checkedTotal += countToCheck;
+            }
+
+            for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
+                generateTerm(parenthesesState);
+
+            // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet.
+            branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain);
+
+            // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack,
+            // or fall through to try the next alternative if no backtrack is available.
+            parenthesesState.plantJumpToBacktrackIfExists(this);
+
+            parenthesesState.linkAlternativeBacktracks(this);
+            // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
+
+            if (countToCheck) {
+                sub32(Imm32(countToCheck), index);
+                parenthesesState.checkedTotal -= countToCheck;
+            }
+        }
+
+        // If the last alternative falls through to here, we have a failed match...
+        // Which means that we match whatever we have matched up to this point (even if nothing).
+    }
+
     void generateParentheticalAssertion(TermGenerationState& state)
     {
         PatternTerm& term = state.term();
@@ -1078,17 +1156,19 @@ class RegexGenerator : private MacroAssembler {
             break;
 
         case PatternTerm::TypeBackReference:
-            m_generationFailed = true;
+            m_shouldFallBack = true;
             break;
 
         case PatternTerm::TypeForwardReference:
             break;
 
         case PatternTerm::TypeParenthesesSubpattern:
-            if ((term.quantityCount == 1) && !term.parentheses.isCopy)
+            if (term.quantityCount == 1 && !term.parentheses.isCopy)
                 generateParenthesesSingle(state);
+            else if (term.parentheses.isTerminal)
+                generateParenthesesGreedyNoBacktrack(state);
             else
-                m_generationFailed = true;
+                m_shouldFallBack = true;
             break;
 
         case PatternTerm::TypeParentheticalAssertion:
@@ -1102,21 +1182,26 @@ class RegexGenerator : private MacroAssembler {
         TermGenerationState state(disjunction, 0);
         state.resetAlternative();
 
-        // Plant a check to see if there is sufficient input available to run the first alternative.
-        // Jumping back to the label 'firstAlternative' will get to this check, jumping to
-        // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
-        // skipped this check.
-
-        Label firstAlternative(this);
-
         // check availability for the next alternative
         int countCheckedForCurrentAlternative = 0;
         int countToCheckForFirstAlternative = 0;
         bool hasShorterAlternatives = false;
+        bool setRepeatAlternativeLabels = false;
         JumpList notEnoughInputForPreviousAlternative;
+        Label firstAlternative;
+        Label firstAlternativeInputChecked;
 
+        // The label 'firstAlternative' is used to plant a check to see if there is 
+        // sufficient input available to run the first repeating alternative.
+        // The label 'firstAlternativeInputChecked' will jump directly to matching 
+        // the first repeating alternative having skipped this check.
+        
         if (state.alternativeValid()) {
             PatternAlternative* alternative = state.alternative();
+            if (!alternative->onceThrough()) {
+                firstAlternative = Label(this);
+                setRepeatAlternativeLabels = true;
+            }
             countToCheckForFirstAlternative = alternative->m_minimumSize;
             state.checkedTotal += countToCheckForFirstAlternative;
             if (countToCheckForFirstAlternative)
@@ -1124,31 +1209,35 @@ class RegexGenerator : private MacroAssembler {
             countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
         }
 
-        Label firstAlternativeInputChecked(this);
+        if (setRepeatAlternativeLabels)
+            firstAlternativeInputChecked = Label(this);
 
         while (state.alternativeValid()) {
-            // Track whether any alternatives are shorter than the first one.
-            hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
-
             PatternAlternative* alternative = state.alternative();
             optimizeAlternative(alternative);
 
+            // Track whether any alternatives are shorter than the first one.
+            if (!alternative->onceThrough())
+                hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
+            
             for (state.resetTerm(); state.termValid(); state.nextTerm())
                 generateTerm(state);
 
             // If we get here, the alternative matched.
             if (m_pattern.m_body->m_callFrameSize)
                 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
-            
+
             ASSERT(index != returnRegister);
             if (m_pattern.m_body->m_hasFixedSize) {
                 move(index, returnRegister);
                 if (alternative->m_minimumSize)
                     sub32(Imm32(alternative->m_minimumSize), returnRegister);
+
+                store32(returnRegister, output);
             } else
-                pop(returnRegister);
+                load32(Address(output), returnRegister);
+
             store32(index, Address(output, 4));
-            store32(returnRegister, output);
 
             generateReturn();
 
@@ -1157,47 +1246,75 @@ class RegexGenerator : private MacroAssembler {
             // if there are any more alternatives, plant the check for input before looping.
             if (state.alternativeValid()) {
                 PatternAlternative* nextAlternative = state.alternative();
-                int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
-
-                if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
+                if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) {
+                    // We have handled non-repeating alternatives, jump to next iteration 
+                    // and loop over repeating alternatives.
+                    state.jumpToBacktrack(jump(), this);
+                    
+                    countToCheckForFirstAlternative = nextAlternative->m_minimumSize;
+                    
                     // If we get here, there the last input checked failed.
                     notEnoughInputForPreviousAlternative.link(this);
-
-                    // Check if sufficent input available to run the next alternative 
-                    notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
-                    // We are now in the correct state to enter the next alternative; this add is only required
-                    // to mirror and revert operation of the sub32, just below.
-                    add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
-
-                    // If we get here, there the last input checked passed.
+                    
                     state.linkAlternativeBacktracks(this);
-                    // No need to check if we can run the next alternative, since it is shorter -
-                    // just update index.
-                    sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
-                } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
-                    // If we get here, there the last input checked failed.
-                    // If there is insufficient input to run the current alternative, and the next alternative is longer,
-                    // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
-                    // we had checked.
-                    notEnoughInputForPreviousAlternative.link(this);
-                    add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
-                    notEnoughInputForPreviousAlternative.append(jump());
 
-                    // The next alternative is longer than the current one; check the difference.
-                    state.linkAlternativeBacktracks(this);
-                    notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
-                } else { // CASE 3: Both alternatives are the same length.
-                    ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
+                    // Back up to start the looping alternatives.
+                    if (countCheckedForCurrentAlternative)
+                        sub32(Imm32(countCheckedForCurrentAlternative), index);
+                    
+                    firstAlternative = Label(this);
+                    
+                    state.checkedTotal = countToCheckForFirstAlternative;
+                    if (countToCheckForFirstAlternative)
+                        notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
+                    
+                    countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+                    
+                    firstAlternativeInputChecked = Label(this);
 
-                    // If the next alterative is the same length as this one, then no need to check the input -
-                    // if there was sufficent input to run the current alternative then there is sufficient
-                    // input to run the next one; if not, there isn't.
-                    state.linkAlternativeBacktracks(this);
+                    setRepeatAlternativeLabels = true;
+                } else {
+                    int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
+                    
+                    if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
+                        // If we get here, then the last input checked failed.
+                        notEnoughInputForPreviousAlternative.link(this);
+                        
+                        // Check if sufficent input available to run the next alternative 
+                        notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+                        // We are now in the correct state to enter the next alternative; this add is only required
+                        // to mirror and revert operation of the sub32, just below.
+                        add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+                        
+                        // If we get here, then the last input checked passed.
+                        state.linkAlternativeBacktracks(this);
+                        // No need to check if we can run the next alternative, since it is shorter -
+                        // just update index.
+                        sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+                    } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
+                        // If we get here, then the last input checked failed.
+                        // If there is insufficient input to run the current alternative, and the next alternative is longer,
+                        // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
+                        // we had checked.
+                        notEnoughInputForPreviousAlternative.link(this);
+                        add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
+                        notEnoughInputForPreviousAlternative.append(jump());
+                        
+                        // The next alternative is longer than the current one; check the difference.
+                        state.linkAlternativeBacktracks(this);
+                        notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+                    } else { // CASE 3: Both alternatives are the same length.
+                        ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
+                        
+                        // If the next alterative is the same length as this one, then no need to check the input -
+                        // if there was sufficent input to run the current alternative then there is sufficient
+                        // input to run the next one; if not, there isn't.
+                        state.linkAlternativeBacktracks(this);
+                    }
+                    state.checkedTotal -= countCheckedForCurrentAlternative;
+                    countCheckedForCurrentAlternative = countToCheckForNextAlternative;
+                    state.checkedTotal += countCheckedForCurrentAlternative;
                 }
-
-                state.checkedTotal -= countCheckedForCurrentAlternative;
-                countCheckedForCurrentAlternative = countToCheckForNextAlternative;
-                state.checkedTotal += countCheckedForCurrentAlternative;
             }
         }
         
@@ -1205,81 +1322,86 @@ class RegexGenerator : private MacroAssembler {
 
         state.checkedTotal -= countCheckedForCurrentAlternative;
 
-        // How much more input need there be to be able to retry from the first alternative?
-        // examples:
-        //   /yarr_jit/ or /wrec|pcre/
-        //     In these examples we need check for one more input before looping.
-        //   /yarr_jit|pcre/
-        //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
-        //     being four longer than the last alternative checked, and another +1 to effectively move
-        //     the start position along by one).
-        //   /yarr|rules/ or /wrec|notsomuch/
-        //     In these examples, provided that there was sufficient input to have just been matching for
-        //     the second alternative we can loop without checking for available input (since the second
-        //     alternative is longer than the first).  In the latter example we need to decrement index
-        //     (by 4) so the start position is only progressed by 1 from the last iteration.
-        int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
-
-        // First, deal with the cases where there was sufficient input to try the last alternative.
-        if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
-            state.linkAlternativeBacktracks(this);
-        else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
-            state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
-        else { // no need to check the input, but we do have some bookkeeping to do first.
+        if (!setRepeatAlternativeLabels) {
+            // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link
+            // the match failures to this point, and fall through to the return below.
             state.linkAlternativeBacktracks(this);
+            notEnoughInputForPreviousAlternative.link(this);
+        } else {
+            // How much more input need there be to be able to retry from the first alternative?
+            // examples:
+            //   /yarr_jit/ or /wrec|pcre/
+            //     In these examples we need check for one more input before looping.
+            //   /yarr_jit|pcre/
+            //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
+            //     being four longer than the last alternative checked, and another +1 to effectively move
+            //     the start position along by one).
+            //   /yarr|rules/ or /wrec|notsomuch/
+            //     In these examples, provided that there was sufficient input to have just been matching for
+            //     the second alternative we can loop without checking for available input (since the second
+            //     alternative is longer than the first).  In the latter example we need to decrement index
+            //     (by 4) so the start position is only progressed by 1 from the last iteration.
+            int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
+
+            // First, deal with the cases where there was sufficient input to try the last alternative.
+            if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
+                state.linkAlternativeBacktracks(this);
+            else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
+                state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
+            else { // no need to check the input, but we do have some bookkeeping to do first.
+                state.linkAlternativeBacktracks(this);
 
-            // Where necessary update our preserved start position.
-            if (!m_pattern.m_body->m_hasFixedSize) {
-                move(index, regT0);
-                sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
-                poke(regT0, m_pattern.m_body->m_callFrameSize);
+                // Where necessary update our preserved start position.
+                if (!m_pattern.m_body->m_hasFixedSize) {
+                    move(index, regT0);
+                    sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+                    store32(regT0, Address(output));
+                }
+
+                // Update index if necessary, and loop (without checking).
+                if (incrementForNextIter)
+                    add32(Imm32(incrementForNextIter), index);
+                jump().linkTo(firstAlternativeInputChecked, this);
             }
 
-            // Update index if necessary, and loop (without checking).
-            if (incrementForNextIter)
-                add32(Imm32(incrementForNextIter), index);
-            jump().linkTo(firstAlternativeInputChecked, this);
+            notEnoughInputForPreviousAlternative.link(this);
+            // Update our idea of the start position, if we're tracking this.
+            if (!m_pattern.m_body->m_hasFixedSize) {
+                if (countCheckedForCurrentAlternative - 1) {
+                    move(index, regT0);
+                    sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+                    store32(regT0, Address(output));
+                } else
+                    store32(index, Address(output));
+            }
+        
+            // Check if there is sufficent input to run the first alternative again.
+            jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
+            // No - insufficent input to run the first alteranative, are there any other alternatives we
+            // might need to check?  If so, the last check will have left the index incremented by
+            // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
+            // LESS input is available, to have the effect of just progressing the start position by 1
+            // from the last iteration.  If this check passes we can just jump up to the check associated
+            // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
+            // first alternative again, and this check will fail (otherwise the check planted just above
+            // here would have passed).  This is a bit sad, however it saves trying to do something more
+            // complex here in compilation, and in the common case we should end up coallescing the checks.
+            //
+            // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
+            // of the minimum-alternative-lengths.  E.g. if I have two alternatives of length 200 and 150,
+            // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
+            // is sufficient input to run either alternative (constantly failing).  If there had been only
+            // one alternative, or if the shorter alternative had come first, we would have terminated
+            // immediately. :-/
+            if (hasShorterAlternatives)
+                jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
+            // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
+            // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 
+            // but since we're about to return a failure this doesn't really matter!)
         }
 
-        notEnoughInputForPreviousAlternative.link(this);
-        // Update our idea of the start position, if we're tracking this.
-        if (!m_pattern.m_body->m_hasFixedSize) {
-            if (countCheckedForCurrentAlternative - 1) {
-                move(index, regT0);
-                sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
-                poke(regT0, m_pattern.m_body->m_callFrameSize);
-            } else
-                poke(index, m_pattern.m_body->m_callFrameSize);
-        }
-        // Check if there is sufficent input to run the first alternative again.
-        jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
-        // No - insufficent input to run the first alteranative, are there any other alternatives we
-        // might need to check?  If so, the last check will have left the index incremented by
-        // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
-        // LESS input is available, to have the effect of just progressing the start position by 1
-        // from the last iteration.  If this check passes we can just jump up to the check associated
-        // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
-        // first alternative again, and this check will fail (otherwise the check planted just above
-        // here would have passed).  This is a bit sad, however it saves trying to do something more
-        // complex here in compilation, and in the common case we should end up coallescing the checks.
-        //
-        // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
-        // of the minimum-alternative-lengths.  E.g. if I have two alternatives of length 200 and 150,
-        // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
-        // is sufficient input to run either alternative (constantly failing).  If there had been only
-        // one alternative, or if the shorter alternative had come first, we would have terminated
-        // immediately. :-/
-        if (hasShorterAlternatives)
-            jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
-        // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
-        // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 
-        // but since we're about to return a failure this doesn't really matter!)
-
-        unsigned frameSize = m_pattern.m_body->m_callFrameSize;
-        if (!m_pattern.m_body->m_hasFixedSize)
-            ++frameSize;
-        if (frameSize)
-            addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
+        if (m_pattern.m_body->m_callFrameSize)
+            addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
 
         move(Imm32(-1), returnRegister);
 
@@ -1312,7 +1434,12 @@ class RegexGenerator : private MacroAssembler {
         push(ARMRegisters::r4);
         push(ARMRegisters::r5);
         push(ARMRegisters::r6);
+#if CPU(ARM_TRADITIONAL)
+        push(ARMRegisters::r8); // scratch register
+#endif
         move(ARMRegisters::r3, output);
+#elif CPU(MIPS)
+        // Do nothing.
 #endif
     }
 
@@ -1327,9 +1454,14 @@ class RegexGenerator : private MacroAssembler {
         pop(X86Registers::ebx);
         pop(X86Registers::ebp);
 #elif CPU(ARM)
+#if CPU(ARM_TRADITIONAL)
+        pop(ARMRegisters::r8); // scratch register
+#endif
         pop(ARMRegisters::r6);
         pop(ARMRegisters::r5);
         pop(ARMRegisters::r4);
+#elif CPU(MIPS)
+        // Do nothing
 #endif
         ret();
     }
@@ -1337,7 +1469,7 @@ class RegexGenerator : private MacroAssembler {
 public:
     RegexGenerator(RegexPattern& pattern)
         : m_pattern(pattern)
-        , m_generationFailed(false)
+        , m_shouldFallBack(false)
     {
     }
 
@@ -1345,9 +1477,8 @@ public:
     {
         generateEnter();
 
-        // TODO: do I really want this on the stack?
         if (!m_pattern.m_body->m_hasFixedSize)
-            push(index);
+            store32(index, Address(output));
 
         if (m_pattern.m_body->m_callFrameSize)
             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
@@ -1359,7 +1490,7 @@ public:
     {
         generate();
 
-        LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
+        LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()), 0);
 
         for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
             patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
@@ -1367,34 +1498,32 @@ public:
         jitObject.set(patchBuffer.finalizeCode());
     }
 
-    bool generationFailed()
+    bool shouldFallBack()
     {
-        return m_generationFailed;
+        return m_shouldFallBack;
     }
 
 private:
     RegexPattern& m_pattern;
+    bool m_shouldFallBack;
     Vector<AlternativeBacktrackRecord> m_backtrackRecords;
-    bool m_generationFailed;
 };
 
-void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
+void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase, bool multiline)
 {
     RegexPattern pattern(ignoreCase, multiline);
-
     if ((error = compileRegex(patternString, pattern)))
         return;
-
     numSubpatterns = pattern.m_numSubpatterns;
 
-    RegexGenerator generator(pattern);
-    generator.compile(globalData, jitObject);
-
-    if (generator.generationFailed()) {
-        JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
-        JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
-        jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
+    if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
+        RegexGenerator generator(pattern);
+        generator.compile(globalData, jitObject);
+        if (!generator.shouldFallBack())
+            return;
     }
+
+    jitObject.setFallback(byteCompileRegex(pattern, allocator));
 }
 
 }}
diff --git a/JavaScriptCore/yarr/RegexJIT.h b/JavaScriptCore/yarr/RegexJIT.h
index 935b9a3..c4c382c 100644
--- a/JavaScriptCore/yarr/RegexJIT.h
+++ b/JavaScriptCore/yarr/RegexJIT.h
@@ -26,16 +26,12 @@
 #ifndef RegexJIT_h
 #define RegexJIT_h
 
-#include <wtf/Platform.h>
-
 #if ENABLE(YARR_JIT)
 
 #include "MacroAssembler.h"
+#include "RegexInterpreter.h" // temporary, remove when fallback is removed.
 #include "RegexPattern.h"
-#include <UString.h>
-
-#include <pcre.h>
-struct JSRegExp; // temporary, remove when fallback is removed.
+#include "UString.h"
 
 #if CPU(X86) && !COMPILER(MSVC)
 #define YARR_CALL __attribute__ ((regparm (3)))
@@ -55,20 +51,23 @@ class RegexCodeBlock {
 
 public:
     RegexCodeBlock()
-        : m_fallback(0)
+        : m_needFallback(false)
     {
     }
 
     ~RegexCodeBlock()
     {
-        if (m_fallback)
-            jsRegExpFree(m_fallback);
     }
 
-    JSRegExp* getFallback() { return m_fallback; }
-    void setFallback(JSRegExp* fallback) { m_fallback = fallback; }
+    BytecodePattern* getFallback() { return m_fallback.get(); }
+    bool isFallback() { return m_needFallback; }
+    void setFallback(PassOwnPtr<BytecodePattern> fallback)
+    {
+        m_fallback = fallback;
+        m_needFallback = true;
+    }
 
-    bool operator!() { return !m_ref.m_code.executableAddress(); }
+    bool operator!() { return (!m_ref.m_code.executableAddress() && !m_fallback); }
     void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
 
     int execute(const UChar* input, unsigned start, unsigned length, int* output)
@@ -76,17 +75,22 @@ public:
         return reinterpret_cast<RegexJITCode>(m_ref.m_code.executableAddress())(input, start, length, output);
     }
 
+#if ENABLE(REGEXP_TRACING)
+    void *getAddr() { return m_ref.m_code.executableAddress(); }
+#endif
+
 private:
     MacroAssembler::CodeRef m_ref;
-    JSRegExp* m_fallback;
+    OwnPtr<Yarr::BytecodePattern> m_fallback;
+    bool m_needFallback;
 };
 
-void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase = false, bool multiline = false);
 
-inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
+inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
 {
-    if (JSRegExp* fallback = jitObject.getFallback())
-        return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
+    if (jitObject.isFallback())
+        return (interpretRegex(jitObject.getFallback(), input, start, length, output));
 
     return jitObject.execute(input, start, length, output);
 }
diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h
index 64e8463..a5c0ba2 100644
--- a/JavaScriptCore/yarr/RegexParser.h
+++ b/JavaScriptCore/yarr/RegexParser.h
@@ -26,14 +26,10 @@
 #ifndef RegexParser_h
 #define RegexParser_h
 
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
-#include <UString.h>
+#include "UString.h"
+#include <limits.h>
 #include <wtf/ASCIICType.h>
 #include <wtf/unicode/Unicode.h>
-#include <limits.h>
 
 namespace JSC { namespace Yarr {
 
@@ -195,8 +191,8 @@ private:
         : m_delegate(delegate)
         , m_backReferenceLimit(backReferenceLimit)
         , m_err(NoError)
-        , m_data(pattern.data())
-        , m_size(pattern.size())
+        , m_data(pattern.characters())
+        , m_size(pattern.length())
         , m_index(0)
         , m_parenthesesNestingDepth(0)
     {
@@ -849,6 +845,4 @@ const char* parse(Delegate& delegate, const UString& pattern, unsigned backRefer
 
 } } // namespace JSC::Yarr
 
-#endif
-
 #endif // RegexParser_h
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h
index dd7512d..c76c641 100644
--- a/JavaScriptCore/yarr/RegexPattern.h
+++ b/JavaScriptCore/yarr/RegexPattern.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,14 +27,9 @@
 #ifndef RegexPattern_h
 #define RegexPattern_h
 
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
 #include <wtf/Vector.h>
 #include <wtf/unicode/Unicode.h>
 
-
 namespace JSC { namespace Yarr {
 
 #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
@@ -42,6 +38,7 @@ namespace JSC { namespace Yarr {
 #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
 #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1
 #define RegexStackSpaceForBackTrackInfoParentheses 4
 
 struct PatternDisjunction;
@@ -57,11 +54,35 @@ struct CharacterRange {
     }
 };
 
+struct CharacterClassTable : RefCounted<CharacterClassTable> {
+    const char* m_table;
+    bool m_inverted;
+    static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
+    {
+        return adoptRef(new CharacterClassTable(table, inverted));
+    }
+
+private:
+    CharacterClassTable(const char* table, bool inverted)
+        : m_table(table)
+        , m_inverted(inverted)
+    {
+    }
+};
+
 struct CharacterClass : FastAllocBase {
+    // All CharacterClass instances have to have the full set of matches and ranges,
+    // they may have an optional table for faster lookups (which must match the
+    // specified matches and ranges)
+    CharacterClass(PassRefPtr<CharacterClassTable> table)
+        : m_table(table)
+    {
+    }
     Vector<UChar> m_matches;
     Vector<CharacterRange> m_ranges;
     Vector<UChar> m_matchesUnicode;
     Vector<CharacterRange> m_rangesUnicode;
+    RefPtr<CharacterClassTable> m_table;
 };
 
 enum QuantifierType {
@@ -92,6 +113,7 @@ struct PatternTerm {
             unsigned subpatternId;
             unsigned lastSubpatternId;
             bool isCopy;
+            bool isTerminal;
         } parentheses;
     };
     QuantifierType quantityType;
@@ -123,6 +145,7 @@ struct PatternTerm {
         parentheses.disjunction = disjunction;
         parentheses.subpatternId = subpatternId;
         parentheses.isCopy = false;
+        parentheses.isTerminal = false;
         quantityType = QuantifierFixedCount;
         quantityCount = 1;
     }
@@ -184,6 +207,10 @@ struct PatternTerm {
 struct PatternAlternative : FastAllocBase {
     PatternAlternative(PatternDisjunction* disjunction)
         : m_parent(disjunction)
+        , m_onceThrough(false)
+        , m_hasFixedSize(false)
+        , m_startsWithBOL(false)
+        , m_containsBOL(false)
     {
     }
 
@@ -198,16 +225,30 @@ struct PatternAlternative : FastAllocBase {
         ASSERT(m_terms.size());
         m_terms.shrink(m_terms.size() - 1);
     }
+    
+    void setOnceThrough()
+    {
+        m_onceThrough = true;
+    }
+    
+    bool onceThrough()
+    {
+        return m_onceThrough;
+    }
 
     Vector<PatternTerm> m_terms;
     PatternDisjunction* m_parent;
     unsigned m_minimumSize;
-    bool m_hasFixedSize;
+    bool m_onceThrough : 1;
+    bool m_hasFixedSize : 1;
+    bool m_startsWithBOL : 1;
+    bool m_containsBOL : 1;
 };
 
 struct PatternDisjunction : FastAllocBase {
     PatternDisjunction(PatternAlternative* parent = 0)
         : m_parent(parent)
+        , m_hasFixedSize(false)
     {
     }
     
@@ -242,10 +283,37 @@ CharacterClass* nondigitsCreate();
 CharacterClass* nonspacesCreate();
 CharacterClass* nonwordcharCreate();
 
+struct TermChain {
+    TermChain(PatternTerm term)
+        : term(term)
+    {}
+
+    PatternTerm term;
+    Vector<TermChain> hotTerms;
+};
+
+struct BeginChar {
+    BeginChar()
+        : value(0)
+        , mask(0)
+    {}
+
+    BeginChar(unsigned value, unsigned mask)
+        : value(value)
+        , mask(mask)
+    {}
+
+    unsigned value;
+    unsigned mask;
+};
+
 struct RegexPattern {
     RegexPattern(bool ignoreCase, bool multiline)
         : m_ignoreCase(ignoreCase)
         , m_multiline(multiline)
+        , m_containsBackreferences(false)
+        , m_containsBeginChars(false)
+        , m_containsBOL(false)
         , m_numSubpatterns(0)
         , m_maxBackReference(0)
         , newlineCached(0)
@@ -269,6 +337,10 @@ struct RegexPattern {
         m_numSubpatterns = 0;
         m_maxBackReference = 0;
 
+        m_containsBackreferences = false;
+        m_containsBeginChars = false;
+        m_containsBOL = false;
+
         newlineCached = 0;
         digitsCached = 0;
         spacesCached = 0;
@@ -281,6 +353,7 @@ struct RegexPattern {
         m_disjunctions.clear();
         deleteAllValues(m_userCharacterClasses);
         m_userCharacterClasses.clear();
+        m_beginChars.clear();
     }
 
     bool containsIllegalBackReference()
@@ -331,13 +404,17 @@ struct RegexPattern {
         return nonwordcharCached;
     }
 
-    bool m_ignoreCase;
-    bool m_multiline;
+    bool m_ignoreCase : 1;
+    bool m_multiline : 1;
+    bool m_containsBackreferences : 1;
+    bool m_containsBeginChars : 1;
+    bool m_containsBOL : 1;
     unsigned m_numSubpatterns;
     unsigned m_maxBackReference;
     PatternDisjunction* m_body;
     Vector<PatternDisjunction*, 4> m_disjunctions;
     Vector<CharacterClass*> m_userCharacterClasses;
+    Vector<BeginChar> m_beginChars;
 
 private:
     CharacterClass* newlineCached;
@@ -351,6 +428,4 @@ private:
 
 } } // namespace JSC::Yarr
 
-#endif
-
 #endif // RegexPattern_h