1 files changed, 357 insertions, 112 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp
index 9cd3d12..06ecbad 100644
--- a/JavaScriptCore/yarr/RegexCompiler.cpp
+++ b/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -30,12 +31,12 @@
 #include "RegexPattern.h"
 #include <wtf/Vector.h>
 
-#if ENABLE(YARR)
-
 using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
+#include "RegExpJitTables.h"
+
 class CharacterClassConstructor {
 public:
     CharacterClassConstructor(bool isCaseInsensitive = false)
@@ -141,7 +142,7 @@ public:
 
     CharacterClass* charClass()
     {
-        CharacterClass* characterClass = new CharacterClass();
+        CharacterClass* characterClass = new CharacterClass(0);
 
         characterClass->m_matches.append(m_matches);
         characterClass->m_ranges.append(m_ranges);
@@ -233,110 +234,118 @@ private:
     Vector<CharacterRange> m_rangesUnicode;
 };
 
+struct BeginCharHelper {
+    BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)
+        : m_beginChars(beginChars)
+        , m_isCaseInsensitive(isCaseInsensitive)
+    {}
+
+    void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)
+    {
+        if (quantityType == QuantifierFixedCount && quantityCount > 1) {
+            // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/
+            beginChar.value |= beginChar.value << 16;
+            beginChar.mask |= beginChar.mask << 16;
+            addCharacter(beginChar);
+        } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())
+            // In case of characters with fixed quantifier we should check the next character as well.
+            linkHotTerms(beginChar, hotTerms);
+        else
+            // In case of greedy matching the next character checking is unnecessary therefore we just store
+            // the first character.
+            addCharacter(beginChar);
+    }
 
-CharacterClass* newlineCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+    // Merge two following BeginChars in the vector to reduce the number of character checks.
+    void merge(unsigned size)
+    {
+        for (unsigned i = 0; i < size; i++) {
+            BeginChar* curr = &m_beginChars->at(i);
+            BeginChar* next = &m_beginChars->at(i + 1);
 
-    characterClass->m_matches.append('\n');
-    characterClass->m_matches.append('\r');
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    
-    return characterClass;
-}
+            // If the current and the next size of value is different we should skip the merge process
+            // because the 16bit and 32bit values are unmergable.
+            if (curr->value <= 0xFFFF && next->value > 0xFFFF)
+                continue;
 
-CharacterClass* digitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+            unsigned diff = curr->value ^ next->value;
 
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    
-    return characterClass;
-}
+            curr->mask |= diff;
+            curr->value |= curr->mask;
 
-CharacterClass* spacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append(' ');
-    characterClass->m_ranges.append(CharacterRange('\t', '\r'));
-    characterClass->m_matchesUnicode.append(0x00a0);
-    characterClass->m_matchesUnicode.append(0x1680);
-    characterClass->m_matchesUnicode.append(0x180e);
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    characterClass->m_matchesUnicode.append(0x202f);
-    characterClass->m_matchesUnicode.append(0x205f);
-    characterClass->m_matchesUnicode.append(0x3000);
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
-    
-    return characterClass;
-}
+            m_beginChars->remove(i + 1);
+            size--;
+        }
+    }
 
-CharacterClass* wordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+private:
+    void addCharacter(BeginChar beginChar)
+    {
+        unsigned pos = 0;
+        unsigned range = m_beginChars->size();
 
-    characterClass->m_matches.append('_');
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    characterClass->m_ranges.append(CharacterRange('A', 'Z'));
-    characterClass->m_ranges.append(CharacterRange('a', 'z'));
-    
-    return characterClass;
-}
+        // binary chop, find position to insert char.
+        while (range) {
+            unsigned index = range >> 1;
 
-CharacterClass* nondigitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+            int val = m_beginChars->at(pos+index).value - beginChar.value;
+            if (!val)
+                return;
+            if (val < 0)
+                range = index;
+            else {
+                pos += (index+1);
+                range -= (index+1);
+            }
+        }
 
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-    
-    return characterClass;
-}
+        if (pos == m_beginChars->size())
+            m_beginChars->append(beginChar);
+        else
+            m_beginChars->insert(pos, beginChar);
+    }
 
-CharacterClass* nonspacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
-    characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
-    characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
-    
-    return characterClass;
-}
+    // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.
+    void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)
+    {
+        for (unsigned i = 0; i < hotTerms->size(); i++) {
+            PatternTerm hotTerm = hotTerms->at(i).term;
+            ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);
 
-CharacterClass* nonwordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
+            UChar characterNext = hotTerm.patternCharacter;
 
-    characterClass->m_matches.append('`');
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
-    characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
-    characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
+            // Append a character to an existing BeginChar object.
+            if (characterNext <= 0x7f) {
+                unsigned mask = 0;
 
-    return characterClass;
-}
+                if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {
+                    mask = 32;
+                    characterNext = toASCIILower(characterNext);
+                }
 
+                addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));
+            } else {
+                UChar upper, lower;
+                if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {
+                    addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));
+                    addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));
+                } else
+                    addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));
+            }
+        }
+    }
+
+    Vector<BeginChar>* m_beginChars;
+    bool m_isCaseInsensitive;
+};
 
 class RegexPatternConstructor {
 public:
     RegexPatternConstructor(RegexPattern& pattern)
         : m_pattern(pattern)
         , m_characterClassConstructor(pattern.m_ignoreCase)
+        , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)
+        , m_invertParentheticalAssertion(false)
     {
     }
 
@@ -352,6 +361,11 @@ public:
     
     void assertionBOL()
     {
+        if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
+            m_alternative->m_startsWithBOL = true;
+            m_alternative->m_containsBOL = true;
+            m_pattern.m_containsBOL = true;
+        }
         m_alternative->m_terms.append(PatternTerm::BOL());
     }
     void assertionEOL()
@@ -455,20 +469,42 @@ public:
         m_pattern.m_disjunctions.append(parenthesesDisjunction);
         m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert));
         m_alternative = parenthesesDisjunction->addNewAlternative();
+        m_invertParentheticalAssertion = invert;
     }
 
     void atomParenthesesEnd()
     {
         ASSERT(m_alternative->m_parent);
         ASSERT(m_alternative->m_parent->m_parent);
+
+        PatternDisjunction* parenthesisDisjunction = m_alternative->m_parent;
         m_alternative = m_alternative->m_parent->m_parent;
+
+        PatternTerm& lastTerm = m_alternative->lastTerm();
+        
+        unsigned numParenAlternatives = parenthesisDisjunction->m_alternatives.size();
+        unsigned numBOLAnchoredAlts = 0;
+        // Bubble up BOL flags
+        for (unsigned i = 0; i < numParenAlternatives; i++) {
+            if (parenthesisDisjunction->m_alternatives[i]->m_startsWithBOL)
+                numBOLAnchoredAlts++;
+        }
         
-        m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+        if (numBOLAnchoredAlts) {
+            m_alternative->m_containsBOL = true;
+            // If all the alternatives in parens start with BOL, then so does this one
+            if (numBOLAnchoredAlts == numParenAlternatives)
+                m_alternative->m_startsWithBOL = true;
+        }
+        
+        lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+        m_invertParentheticalAssertion = false;
     }
 
     void atomBackReference(unsigned subpatternId)
     {
         ASSERT(subpatternId);
+        m_pattern.m_containsBackreferences = true;
         m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
 
         if (subpatternId > m_pattern.m_numSubpatterns) {
@@ -493,32 +529,39 @@ public:
         m_alternative->m_terms.append(PatternTerm(subpatternId));
     }
 
-    PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction)
+    // deep copy the argument disjunction.  If filterStartsWithBOL is true, 
+    // skip alternatives with m_startsWithBOL set true.
+    PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
     {
-        PatternDisjunction* newDisjunction = new PatternDisjunction();
-
-        newDisjunction->m_parent = disjunction->m_parent;
+        PatternDisjunction* newDisjunction = 0;
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             PatternAlternative* alternative = disjunction->m_alternatives[alt];
-            PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
-            for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
-                newAlternative->m_terms.append(copyTerm(alternative->m_terms[i]));
+            if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
+                if (!newDisjunction) {
+                    newDisjunction = new PatternDisjunction();
+                    newDisjunction->m_parent = disjunction->m_parent;
+                }
+                PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+                for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
+                    newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
+            }
         }
-
-        m_pattern.m_disjunctions.append(newDisjunction);
+        
+        if (newDisjunction)
+            m_pattern.m_disjunctions.append(newDisjunction);
         return newDisjunction;
     }
-
-    PatternTerm copyTerm(PatternTerm& term)
+    
+    PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
     {
         if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
             return PatternTerm(term);
-
+        
         PatternTerm termCopy = term;
-        termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction);
+        termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
         return termCopy;
     }
-
+    
     void quantifyAtom(unsigned min, unsigned max, bool greedy)
     {
         ASSERT(min <= max);
@@ -624,14 +667,17 @@ public:
             case PatternTerm::TypeParenthesesSubpattern:
                 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
                 term.frameLocation = currentCallFrameSize;
-                if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
-                    if (term.quantityType == QuantifierFixedCount) {
-                        currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
-                        currentInputPosition += term.parentheses.disjunction->m_minimumSize;
-                    } else {
+                if (term.quantityCount == 1 && !term.parentheses.isCopy) {
+                    if (term.quantityType != QuantifierFixedCount)
                         currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
-                        currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
-                    }
+                    currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+                    // If quantity is fixed, then pre-check its minimum size.
+                    if (term.quantityType == QuantifierFixedCount)
+                        currentInputPosition += term.parentheses.disjunction->m_minimumSize;
+                    term.inputPosition = currentInputPosition;
+                } else if (term.parentheses.isTerminal) {
+                    currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal;
+                    currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
                     term.inputPosition = currentInputPosition;
                 } else {
                     term.inputPosition = currentInputPosition;
@@ -685,11 +731,208 @@ public:
         setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
     }
 
+    // This optimization identifies sets of parentheses that we will never need to backtrack.
+    // In these cases we do not need to store state from prior iterations.
+    // We can presently avoid backtracking for:
+    //   * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction).
+    //   * where the parens are non-capturing, and quantified unbounded greedy (*).
+    //   * where the parens do not contain any capturing subpatterns.
+    void checkForTerminalParentheses()
+    {
+        // This check is much too crude; should be just checking whether the candidate
+        // node contains nested capturing subpatterns, not the whole expression!
+        if (m_pattern.m_numSubpatterns)
+            return;
+
+        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+        for (unsigned i =0; i < alternatives.size(); ++i) {
+            Vector<PatternTerm>& terms = alternatives[i]->m_terms;
+            if (terms.size()) {
+                PatternTerm& term = terms.last();
+                if (term.type == PatternTerm::TypeParenthesesSubpattern
+                    && term.quantityType == QuantifierGreedy
+                    && term.quantityCount == UINT_MAX
+                    && !term.capture())
+                    term.parentheses.isTerminal = true;
+            }
+        }
+    }
+
+    void optimizeBOL()
+    {
+        // Look for expressions containing beginning of line (^) anchoring and unroll them.
+        // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
+        // This code relies on the parsing code tagging alternatives with m_containsBOL and
+        // m_startsWithBOL and rolling those up to containing alternatives.
+        // At this point, this is only valid for non-multiline expressions.
+        PatternDisjunction* disjunction = m_pattern.m_body;
+        
+        if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
+            return;
+        
+        PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
+
+        // Set alternatives in disjunction to "onceThrough"
+        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
+            disjunction->m_alternatives[alt]->setOnceThrough();
+
+        if (loopDisjunction) {
+            // Move alternatives from loopDisjunction to disjunction
+            for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
+                disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
+                
+            loopDisjunction->m_alternatives.clear();
+        }
+    }
+
+    bool addBeginTerm(PatternTerm term, Vector<TermChain>* beginTerms, PatternAlternative* alternative, unsigned numTerms, unsigned termIndex, unsigned depth)
+    {
+        if (term.quantityType == QuantifierFixedCount) {
+            beginTerms->append(TermChain(term));
+            if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)
+                setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1);
+        } else if (termIndex != numTerms - 1) {
+            beginTerms->append(TermChain(term));
+            return true;
+        }
+
+        return false;
+    }
+
+    // This function collects the terms which are potentially matching the first number of depth characters in the result.
+    // If this function returns false then it found at least one term which makes the beginning character
+    // look-up optimization inefficient.
+    bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)
+    {
+        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+            PatternAlternative* alternative = disjunction->m_alternatives[alt];
+
+            if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)
+    {
+        bool checkNext = true;
+        unsigned numTerms = alternative->m_terms.size();
+
+        while (checkNext && termIndex < numTerms) {
+            PatternTerm term = alternative->m_terms[termIndex];
+            checkNext = false;
+
+            switch (term.type) {
+            case PatternTerm::TypeAssertionBOL:
+            case PatternTerm::TypeAssertionEOL:
+            case PatternTerm::TypeAssertionWordBoundary:
+                return false;
+
+            case PatternTerm::TypeBackReference:
+            case PatternTerm::TypeForwardReference:
+                return false;
+
+            case PatternTerm::TypePatternCharacter:
+                if (addBeginTerm(term, beginTerms, alternative, numTerms, termIndex, depth)) {
+                    termIndex++;
+                    checkNext = true;
+                }
+                break;
+
+            case PatternTerm::TypeCharacterClass:
+                return false;
+
+            case PatternTerm::TypeParentheticalAssertion:
+                if (term.invertOrCapture)
+                    return false;
+
+            case PatternTerm::TypeParenthesesSubpattern:
+                if (term.quantityType != QuantifierFixedCount) {
+                    if (termIndex == numTerms - 1)
+                        break;
+
+                    termIndex++;
+                    checkNext = true;
+
+                }
+
+                if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))
+                    return false;
+
+                break;
+            }
+        }
+
+        return true;
+    }
+
+    void setupBeginChars()
+    {
+        Vector<TermChain> beginTerms;
+        bool containsFixedCharacter = false;
+
+        if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)
+                && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {
+            unsigned size = beginTerms.size();
+
+            // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.
+            if (!size)
+                return;
+
+            m_pattern.m_containsBeginChars = true;
+
+            for (unsigned i = 0; i < size; i++) {
+                PatternTerm term = beginTerms[i].term;
+
+                // We have just collected PatternCharacter terms, other terms are not allowed.
+                ASSERT(term.type == PatternTerm::TypePatternCharacter);
+
+                if (term.quantityType == QuantifierFixedCount)
+                    containsFixedCharacter = true;
+
+                UChar character = term.patternCharacter;
+                unsigned mask = 0;
+
+                if (character <= 0x7f) {
+                    if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {
+                        mask = 32;
+                        character = toASCIILower(character);
+                    }
+
+                    m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                } else {
+                    UChar upper, lower;
+                    if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {
+                        m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                        m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                    } else
+                        m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
+                }
+            }
+
+            // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.
+            if (!containsFixedCharacter) {
+                m_pattern.m_containsBeginChars = false;
+                return;
+            }
+
+            size = m_pattern.m_beginChars.size();
+
+            if (size > 2)
+                m_beginCharHelper.merge(size - 1);
+            else if (size <= 1)
+                m_pattern.m_containsBeginChars = false;
+        }
+    }
+
 private:
     RegexPattern& m_pattern;
     PatternAlternative* m_alternative;
     CharacterClassConstructor m_characterClassConstructor;
+    BeginCharHelper m_beginCharHelper;
     bool m_invertCharacterClass;
+    bool m_invertParentheticalAssertion;
 };
 
 
@@ -717,12 +960,14 @@ const char* compileRegex(const UString& patternString, RegexPattern& pattern)
         ASSERT(numSubpatterns == pattern.m_numSubpatterns);
     }
 
+    constructor.checkForTerminalParentheses();
+    constructor.optimizeBOL();
+        
     constructor.setupOffsets();
+    constructor.setupBeginChars();
 
-    return false;
+    return 0;
 };
 
 
 } }
-
-#endif