diff options
author | Ben Murdoch <benm@google.com> | 2011-05-05 14:36:32 +0100 |
---|---|---|
committer | Ben Murdoch <benm@google.com> | 2011-05-10 15:38:30 +0100 |
commit | f05b935882198ccf7d81675736e3aeb089c5113a (patch) | |
tree | 4ea0ca838d9ef1b15cf17ddb3928efb427c7e5a1 /JavaScriptCore/yarr | |
parent | 60fbdcc62bced8db2cb1fd233cc4d1e4ea17db1b (diff) | |
download | external_webkit-f05b935882198ccf7d81675736e3aeb089c5113a.zip external_webkit-f05b935882198ccf7d81675736e3aeb089c5113a.tar.gz external_webkit-f05b935882198ccf7d81675736e3aeb089c5113a.tar.bz2 |
Merge WebKit at r74534: Initial merge by git.
Change-Id: I6ccd1154fa1b19c2ec2a66878eb675738735f1eb
Diffstat (limited to 'JavaScriptCore/yarr')
-rw-r--r-- | JavaScriptCore/yarr/RegexCompiler.cpp | 23 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.cpp | 79 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.h | 30 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexJIT.cpp | 1071 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexJIT.h | 22 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexParser.h | 151 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexPattern.h | 29 |
7 files changed, 1048 insertions, 357 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp index 06ecbad..9063359 100644 --- a/JavaScriptCore/yarr/RegexCompiler.cpp +++ b/JavaScriptCore/yarr/RegexCompiler.cpp @@ -459,7 +459,7 @@ public: PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative); m_pattern.m_disjunctions.append(parenthesesDisjunction); - m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture)); + m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false)); m_alternative = parenthesesDisjunction->addNewAlternative(); } @@ -467,7 +467,7 @@ public: { PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative); m_pattern.m_disjunctions.append(parenthesesDisjunction); - m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert)); + m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert)); m_alternative = parenthesesDisjunction->addNewAlternative(); m_invertParentheticalAssertion = invert; } @@ -477,16 +477,16 @@ public: ASSERT(m_alternative->m_parent); ASSERT(m_alternative->m_parent->m_parent); - PatternDisjunction* parenthesisDisjunction = m_alternative->m_parent; + PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent; m_alternative = m_alternative->m_parent->m_parent; PatternTerm& lastTerm = m_alternative->lastTerm(); - unsigned numParenAlternatives = parenthesisDisjunction->m_alternatives.size(); + unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size(); unsigned numBOLAnchoredAlts = 0; // Bubble up BOL flags for (unsigned i = 0; i < numParenAlternatives; i++) { - if (parenthesisDisjunction->m_alternatives[i]->m_startsWithBOL) + if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL) numBOLAnchoredAlts++; } @@ -520,7 +520,7 @@ public: PatternTerm& term = currentAlternative->lastTerm(); ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion)); - if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) { + if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) { m_alternative->m_terms.append(PatternTerm::ForwardReference()); return; } @@ -595,7 +595,7 @@ public: term.quantify(min, QuantifierFixedCount); m_alternative->m_terms.append(copyTerm(term)); // NOTE: this term is interesting from an analysis perspective, in that it can be ignored..... - m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy); + m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy); if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern) m_alternative->lastTerm().parentheses.isCopy = true; } @@ -734,7 +734,8 @@ public: // This optimization identifies sets of parentheses that we will never need to backtrack. // In these cases we do not need to store state from prior iterations. // We can presently avoid backtracking for: - // * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction). + // * where the parens are at the end of the regular expression (last term in any of the + // alternatives of the main body disjunction). // * where the parens are non-capturing, and quantified unbounded greedy (*). // * where the parens do not contain any capturing subpatterns. void checkForTerminalParentheses() @@ -745,13 +746,13 @@ public: return; Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives; - for (unsigned i =0; i < alternatives.size(); ++i) { + for (size_t i = 0; i < alternatives.size(); ++i) { Vector<PatternTerm>& terms = alternatives[i]->m_terms; if (terms.size()) { PatternTerm& term = terms.last(); if (term.type == PatternTerm::TypeParenthesesSubpattern && term.quantityType == QuantifierGreedy - && term.quantityCount == UINT_MAX + && term.quantityCount == quantifyInfinite && !term.capture()) term.parentheses.isTerminal = true; } @@ -844,7 +845,7 @@ public: return false; case PatternTerm::TypeParentheticalAssertion: - if (term.invertOrCapture) + if (term.invert()) return false; case PatternTerm::TypeParenthesesSubpattern: diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp index 164158e..9900cbe 100644 --- a/JavaScriptCore/yarr/RegexInterpreter.cpp +++ b/JavaScriptCore/yarr/RegexInterpreter.cpp @@ -68,8 +68,6 @@ public: struct BackTrackInfoParentheses { uintptr_t matchAmount; ParenthesesDisjunctionContext* lastContext; - uintptr_t prevBegin; - uintptr_t prevEnd; }; static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) @@ -515,8 +513,7 @@ public: if (matchEnd == -1) return true; - ASSERT((matchBegin == -1) == (matchEnd == -1)); - ASSERT(matchBegin <= matchEnd); + ASSERT((matchBegin == -1) || (matchBegin <= matchEnd)); if (matchBegin == matchEnd) return true; @@ -558,8 +555,7 @@ public: int matchBegin = output[(term.atom.subpatternId << 1)]; int matchEnd = output[(term.atom.subpatternId << 1) + 1]; - ASSERT((matchBegin == -1) == (matchEnd == -1)); - ASSERT(matchBegin <= matchEnd); + ASSERT((matchBegin == -1) || (matchBegin <= matchEnd)); if (matchBegin == matchEnd) return false; @@ -719,7 +715,7 @@ public: if (term.capture()) { // Technically this access to inputPosition should be accessing the begin term's // inputPosition, but for repeats other than fixed these values should be - // the same anyway! (we don't pre-check for greedy or non-greedy matches.) + // the same anyway! (We don't pre-check for greedy or non-greedy matches.) ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); unsigned subpatternId = term.atom.subpatternId; @@ -739,7 +735,7 @@ public: { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); ASSERT(term.atom.quantityType == QuantifierGreedy); - ASSERT(term.atom.quantityCount == UINT_MAX); + ASSERT(term.atom.quantityCount == quantifyInfinite); ASSERT(!term.capture()); BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); @@ -765,7 +761,7 @@ public: { ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); ASSERT(term.atom.quantityType == QuantifierGreedy); - ASSERT(term.atom.quantityCount == UINT_MAX); + ASSERT(term.atom.quantityCount == quantifyInfinite); ASSERT(!term.capture()); // If we backtrack to this point, we have failed to match this iteration of the parens. @@ -777,7 +773,7 @@ public: bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*) { // 'Terminal' parentheses are at the end of the regex, and as such a match past end - // should always be returned as a successful match - we should never becktrack to here. + // should always be returned as a successful match - we should never backtrack to here. ASSERT_NOT_REACHED(); return false; } @@ -843,13 +839,8 @@ public: ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); - - unsigned subpatternId = term.atom.subpatternId; ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; - backTrack->prevBegin = output[(subpatternId << 1)]; - backTrack->prevEnd = output[(subpatternId << 1) + 1]; - backTrack->matchAmount = 0; backTrack->lastContext = 0; @@ -929,13 +920,6 @@ public: ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); - - if (term.capture()) { - unsigned subpatternId = term.atom.subpatternId; - output[(subpatternId << 1)] = backTrack->prevBegin; - output[(subpatternId << 1) + 1] = backTrack->prevEnd; - } - ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; switch (term.atom.quantityType) { @@ -1524,7 +1508,7 @@ public: { int beginTerm = m_bodyDisjunction->terms.size(); - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; @@ -1537,7 +1521,7 @@ public: { int beginTerm = m_bodyDisjunction->terms.size(); - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; @@ -1554,7 +1538,7 @@ public: int beginTerm = m_bodyDisjunction->terms.size(); - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; @@ -1567,7 +1551,7 @@ public: { int beginTerm = m_bodyDisjunction->terms.size(); - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; @@ -1584,10 +1568,10 @@ public: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); - bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture; + bool invert = m_bodyDisjunction->terms[beginTerm].invert(); unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, invertOrCapture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition)); m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; @@ -1679,7 +1663,7 @@ public: ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; - bool invertOrCapture = parenthesesBegin.invertOrCapture; + bool capture = parenthesesBegin.capture(); unsigned subpatternId = parenthesesBegin.atom.subpatternId; unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; @@ -1693,7 +1677,7 @@ public: m_bodyDisjunction->terms.shrink(beginTerm); m_allParenthesesInfo.append(parenthesesDisjunction); - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition)); m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; @@ -1708,10 +1692,10 @@ public: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); - bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture; + bool capture = m_bodyDisjunction->terms[beginTerm].capture(); unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; @@ -1730,10 +1714,10 @@ public: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); - bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture; + bool capture = m_bodyDisjunction->terms[beginTerm].capture(); unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; - m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, invertOrCapture, inputPosition)); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition)); m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; @@ -1816,7 +1800,7 @@ public: break; case PatternTerm::TypeAssertionWordBoundary: - assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked); + assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked); break; case PatternTerm::TypePatternCharacter: @@ -1824,11 +1808,11 @@ public: break; case PatternTerm::TypeCharacterClass: - atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); + atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); break; case PatternTerm::TypeBackReference: - atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); + atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); break; case PatternTerm::TypeForwardReference: @@ -1844,17 +1828,17 @@ public: else alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesOnceBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation); + atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); } else if (term.parentheses.isTerminal) { unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce); + atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); } else { unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0); + atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); } @@ -1867,7 +1851,7 @@ public: ASSERT(currentCountAlreadyChecked >= (unsigned)term.inputPosition); int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition; - atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation); + atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true); atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); break; @@ -1885,19 +1869,6 @@ private: Vector<ByteDisjunction*> m_allParenthesesInfo; }; - -PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase, bool multiline) -{ - RegexPattern pattern(ignoreCase, multiline); - - if ((error = compileRegex(patternString, pattern))) - return PassOwnPtr<BytecodePattern>(); - - numSubpatterns = pattern.m_numSubpatterns; - - return ByteCompiler(pattern).compile(allocator); -} - PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator* allocator) { return ByteCompiler(pattern).compile(allocator); diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h index 2e23472..0fd8a57 100644 --- a/JavaScriptCore/yarr/RegexInterpreter.h +++ b/JavaScriptCore/yarr/RegexInterpreter.h @@ -87,7 +87,6 @@ struct ByteTerm { TypeParentheticalAssertionEnd, TypeCheckInput, } type; - bool invertOrCapture; union { struct { union { @@ -114,10 +113,14 @@ struct ByteTerm { unsigned checkInputCount; }; unsigned frameLocation; + bool m_capture : 1; + bool m_invert : 1; int inputPosition; ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) : frameLocation(frameLocation) + , m_capture(false) + , m_invert(false) { switch (quantityType) { case QuantifierFixedCount: @@ -139,6 +142,8 @@ struct ByteTerm { ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) : frameLocation(frameLocation) + , m_capture(false) + , m_invert(false) { switch (quantityType) { case QuantifierFixedCount: @@ -161,7 +166,8 @@ struct ByteTerm { ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) : type(ByteTerm::TypeCharacterClass) - , invertOrCapture(invert) + , m_capture(false) + , m_invert(invert) { atom.characterClass = characterClass; atom.quantityType = QuantifierFixedCount; @@ -169,9 +175,10 @@ struct ByteTerm { inputPosition = inputPos; } - ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos) + ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) : type(type) - , invertOrCapture(invertOrCapture) + , m_capture(capture) + , m_invert(false) { atom.subpatternId = subpatternId; atom.parenthesesDisjunction = parenthesesInfo; @@ -182,15 +189,17 @@ struct ByteTerm { ByteTerm(Type type, bool invert = false) : type(type) - , invertOrCapture(invert) + , m_capture(false) + , m_invert(invert) { atom.quantityType = QuantifierFixedCount; atom.quantityCount = 1; } - ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos) + ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) : type(type) - , invertOrCapture(invertOrCapture) + , m_capture(capture) + , m_invert(invert) { atom.subpatternId = subpatternId; atom.quantityType = QuantifierFixedCount; @@ -228,7 +237,7 @@ struct ByteTerm { static ByteTerm BackReference(unsigned subpatternId, int inputPos) { - return ByteTerm(TypeBackReference, subpatternId, false, inputPos); + return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); } static ByteTerm BodyAlternativeBegin(bool onceThrough) @@ -297,12 +306,12 @@ struct ByteTerm { bool invert() { - return invertOrCapture; + return m_invert; } bool capture() { - return invertOrCapture; + return m_capture; } }; @@ -364,7 +373,6 @@ private: Vector<CharacterClass*> m_userCharacterClasses; }; -PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator*, bool ignoreCase = false, bool multiline = false); PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*); int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output); diff --git a/JavaScriptCore/yarr/RegexJIT.cpp b/JavaScriptCore/yarr/RegexJIT.cpp index acbd458..9e6756d 100644 --- a/JavaScriptCore/yarr/RegexJIT.cpp +++ b/JavaScriptCore/yarr/RegexJIT.cpp @@ -31,7 +31,6 @@ #include "LinkBuffer.h" #include "MacroAssembler.h" #include "RegexCompiler.h" -#include "RegexInterpreter.h" // temporary, remove when fallback is removed. #if ENABLE(YARR_JIT) @@ -111,16 +110,16 @@ class RegexGenerator : private MacroAssembler { int which = count >> 1; char lo = ranges[which].begin; char hi = ranges[which].end; - + // check if there are any ranges or matches below lo. If not, just jl to failure - // if there is anything else to check, check that first, if it falls through jmp to failure. if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); - + // generate code for all ranges before this one if (which) matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); - + while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); ++*matchIndex; @@ -155,25 +154,25 @@ class RegexGenerator : private MacroAssembler { { if (charClass->m_table) { ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); - matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); + matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); return; } Jump unicodeFail; if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); - + if (charClass->m_matchesUnicode.size()) { for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { UChar ch = charClass->m_matchesUnicode[i]; matchDest.append(branch32(Equal, character, Imm32(ch))); } } - + if (charClass->m_rangesUnicode.size()) { for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { UChar lo = charClass->m_rangesUnicode[i].begin; UChar hi = charClass->m_rangesUnicode[i].end; - + Jump below = branch32(LessThan, character, Imm32(lo)); matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); below.link(this); @@ -186,7 +185,7 @@ class RegexGenerator : private MacroAssembler { if (charClass->m_ranges.size()) { unsigned matchIndex = 0; - JumpList failures; + JumpList failures; matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); while (matchIndex < charClass->m_matches.size()) matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); @@ -288,6 +287,27 @@ class RegexGenerator : private MacroAssembler { jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); } + struct IndirectJumpEntry { + IndirectJumpEntry(int32_t stackOffset) + : m_stackOffset(stackOffset) + { + } + + IndirectJumpEntry(int32_t stackOffset, Jump jump) + : m_stackOffset(stackOffset) + { + addJump(jump); + } + + void addJump(Jump jump) + { + m_relJumps.append(jump); + } + + int32_t m_stackOffset; + JumpList m_relJumps; + }; + struct AlternativeBacktrackRecord { DataLabelPtr dataLabel; Label backtrackLocation; @@ -299,16 +319,469 @@ class RegexGenerator : private MacroAssembler { } }; + struct ParenthesesTail; + struct TermGenerationState; + + struct GenerationState { + typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap; + + GenerationState() + : m_parenNestingLevel(0) + { + } + + void addIndirectJumpEntry(int32_t stackOffset, Jump jump) + { + IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); + + ASSERT(stackOffset >= 0); + + uint32_t offset = static_cast<uint32_t>(stackOffset); + + if (result == m_indirectJumpMap.end()) + m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump)); + else + result->second->addJump(jump); + } + + void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps) + { + JumpList::JumpVector jumpVector = jumps.jumps(); + size_t size = jumpVector.size(); + for (size_t i = 0; i < size; ++i) + addIndirectJumpEntry(stackOffset, jumpVector[i]); + + jumps.empty(); + } + + void emitIndirectJumpTable(MacroAssembler* masm) + { + for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) { + IndirectJumpEntry* indJumpEntry = iter->second; + indJumpEntry->m_relJumps.link(masm); + masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset)); + delete indJumpEntry; + } + } + + void incrementParenNestingLevel() + { + ++m_parenNestingLevel; + } + + void decrementParenNestingLevel() + { + --m_parenNestingLevel; + } + + ParenthesesTail* addParenthesesTail(PatternTerm& term, ParenthesesTail* nextOuterParenTail) + { + ParenthesesTail* parenthesesTail = new ParenthesesTail(term, m_parenNestingLevel, nextOuterParenTail); + m_parenTails.append(parenthesesTail); + m_parenTailsForIteration.append(parenthesesTail); + + return parenthesesTail; + } + + void emitParenthesesTail(RegexGenerator* generator) + { + unsigned vectorSize = m_parenTails.size(); + bool priorBacktrackFallThrough = false; + + // Emit in reverse order so parentTail N can fall through to N-1 + for (unsigned index = vectorSize; index > 0; --index) { + JumpList jumpsToNext; + priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1); + if (index > 1) + jumpsToNext.linkTo(generator->label(), generator); + else + addJumpsToNextInteration(jumpsToNext); + } + m_parenTails.clear(); + } + + void addJumpToNextInteration(Jump jump) + { + m_jumpsToNextInteration.append(jump); + } + + void addJumpsToNextInteration(JumpList jumps) + { + m_jumpsToNextInteration.append(jumps); + } + + void addDataLabelToNextIteration(DataLabelPtr dataLabel) + { + m_dataPtrsToNextIteration.append(dataLabel); + } + + void linkToNextIteration(Label label) + { + m_nextIteration = label; + + for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i) + m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration)); + + m_dataPtrsToNextIteration.clear(); + + for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i) + m_parenTailsForIteration[i]->setNextIteration(m_nextIteration); + + m_parenTailsForIteration.clear(); + } + + void linkToNextIteration(RegexGenerator* generator) + { + m_jumpsToNextInteration.linkTo(m_nextIteration, generator); + } + + int m_parenNestingLevel; + Vector<AlternativeBacktrackRecord> m_backtrackRecords; + IndirectJumpHashMap m_indirectJumpMap; + Label m_nextIteration; + Vector<OwnPtr<ParenthesesTail> > m_parenTails; + JumpList m_jumpsToNextInteration; + Vector<DataLabelPtr> m_dataPtrsToNextIteration; + Vector<ParenthesesTail*> m_parenTailsForIteration; + }; + + struct BacktrackDestination { + typedef enum { + NoBacktrack, + BacktrackLabel, + BacktrackStackOffset, + BacktrackJumpList, + BacktrackLinked + } BacktrackType; + + BacktrackDestination() + : m_backtrackType(NoBacktrack) + , m_backtrackToLabel(0) + , m_subDataLabelPtr(0) + , m_nextBacktrack(0) + , m_backtrackSourceLabel(0) + , m_backtrackSourceJumps(0) + { + } + + BacktrackDestination(int32_t stackOffset) + : m_backtrackType(BacktrackStackOffset) + , m_backtrackStackOffset(stackOffset) + , m_backtrackToLabel(0) + , m_subDataLabelPtr(0) + , m_nextBacktrack(0) + , m_backtrackSourceLabel(0) + , m_backtrackSourceJumps(0) + { + } + + BacktrackDestination(Label label) + : m_backtrackType(BacktrackLabel) + , m_backtrackLabel(label) + , m_backtrackToLabel(0) + , m_subDataLabelPtr(0) + , m_nextBacktrack(0) + , m_backtrackSourceLabel(0) + , m_backtrackSourceJumps(0) + { + } + + void clear(bool doDataLabelClear = true) + { + m_backtrackType = NoBacktrack; + if (doDataLabelClear) + clearDataLabel(); + m_nextBacktrack = 0; + } + + void clearDataLabel() + { + m_dataLabelPtr = DataLabelPtr(); + } + + bool hasDestination() + { + return (m_backtrackType != NoBacktrack); + } + + bool isStackOffset() + { + return (m_backtrackType == BacktrackStackOffset); + } + + bool isLabel() + { + return (m_backtrackType == BacktrackLabel); + } + + bool isJumpList() + { + return (m_backtrackType == BacktrackJumpList); + } + + bool hasDataLabel() + { + return m_dataLabelPtr.isSet(); + } + + void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true) + { + m_backtrackType = rhs.m_backtrackType; + if (m_backtrackType == BacktrackStackOffset) + m_backtrackStackOffset = rhs.m_backtrackStackOffset; + else if (m_backtrackType == BacktrackLabel) + m_backtrackLabel = rhs.m_backtrackLabel; + if (copyDataLabel) + m_dataLabelPtr = rhs.m_dataLabelPtr; + m_backtrackSourceJumps = rhs.m_backtrackSourceJumps; + m_backtrackSourceLabel = rhs.m_backtrackSourceLabel; + } + + void copyTo(BacktrackDestination& lhs) + { + lhs.m_backtrackType = m_backtrackType; + if (m_backtrackType == BacktrackStackOffset) + lhs.m_backtrackStackOffset = m_backtrackStackOffset; + else if (m_backtrackType == BacktrackLabel) + lhs.m_backtrackLabel = m_backtrackLabel; + lhs.m_backtrackSourceJumps = m_backtrackSourceJumps; + lhs.m_backtrackSourceLabel = m_backtrackSourceLabel; + lhs.m_dataLabelPtr = m_dataLabelPtr; + lhs.m_backTrackJumps = m_backTrackJumps; + } + + void addBacktrackJump(Jump jump) + { + m_backTrackJumps.append(jump); + } + + void setStackOffset(int32_t stackOffset) + { + m_backtrackType = BacktrackStackOffset; + m_backtrackStackOffset = stackOffset; + } + + void setLabel(Label label) + { + m_backtrackType = BacktrackLabel; + m_backtrackLabel = label; + } + + void setNextBacktrackLabel(Label label) + { + if (m_nextBacktrack) + m_nextBacktrack->setLabel(label); + } + + void copyBacktrackToLabel(BacktrackDestination& rhs) + { + if (rhs.m_backtrackToLabel) + m_backtrackToLabel = rhs.m_backtrackToLabel; + } + + void setBacktrackToLabel(Label* backtrackToLabel) + { + if (!m_backtrackToLabel) + m_backtrackToLabel = backtrackToLabel; + } + + void setBacktrackJumpList(JumpList* jumpList) + { + m_backtrackType = BacktrackJumpList; + m_backtrackSourceJumps = jumpList; + } + + void setBacktrackSourceLabel(Label* backtrackSourceLabel) + { + m_backtrackSourceLabel = backtrackSourceLabel; + } + + void setDataLabel(DataLabelPtr dp) + { + if (m_subDataLabelPtr) { + *m_subDataLabelPtr = dp; + m_subDataLabelPtr = 0; + } else + m_dataLabelPtr = dp; + } + + void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr) + { + m_subDataLabelPtr = subDataLabelPtr; + } + + void linkToNextBacktrack(BacktrackDestination* nextBacktrack) + { + m_nextBacktrack = nextBacktrack; + } + + int32_t getStackOffset() + { + ASSERT(m_backtrackType == BacktrackStackOffset); + return m_backtrackStackOffset; + } + + Label getLabel() + { + ASSERT(m_backtrackType == BacktrackLabel); + return m_backtrackLabel; + } + + JumpList& getBacktrackJumps() + { + return m_backTrackJumps; + } + + DataLabelPtr& getDataLabel() + { + return m_dataLabelPtr; + } + + void jumpToBacktrack(MacroAssembler* masm) + { + if (isJumpList()) { + if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) + masm->jump().linkTo(*m_backtrackSourceLabel, masm); + else + m_backtrackSourceJumps->append(masm->jump()); + } else if (isStackOffset()) + masm->jump(Address(stackPointerRegister, m_backtrackStackOffset)); + else if (isLabel()) + masm->jump().linkTo(m_backtrackLabel, masm); + else + m_backTrackJumps.append(masm->jump()); + } + + void jumpToBacktrack(RegexGenerator* generator, Jump jump) + { + if (isJumpList()) { + if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) + jump.linkTo(*m_backtrackSourceLabel, generator); + else + m_backtrackSourceJumps->append(jump); + } else if (isStackOffset()) + generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump); + else if (isLabel()) + jump.linkTo(getLabel(), generator); + else + m_backTrackJumps.append(jump); + } + + void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps) + { + if (isJumpList()) { + if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) + jumps.linkTo(*m_backtrackSourceLabel, generator); + else + m_backtrackSourceJumps->append(jumps); + } else if (isStackOffset()) + generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps); + else if (isLabel()) + jumps.linkTo(getLabel(), generator); + else + m_backTrackJumps.append(jumps); + } + + bool linkDataLabelToHereIfExists(RegexGenerator* generator) + { + if (hasDataLabel()) { + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), generator->label())); + clearDataLabel(); + return true; + } + + return false; + } + + bool plantJumpToBacktrackIfExists(RegexGenerator* generator) + { + if (isJumpList()) { + if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) + generator->jump(*m_backtrackSourceLabel); + else + m_backtrackSourceJumps->append(generator->jump()); + + return true; + } + + if (isStackOffset()) { + generator->jump(Address(stackPointerRegister, getStackOffset())); + return true; + } + + if (isLabel()) { + generator->jump(getLabel()); + if (hasDataLabel()) { + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel())); + clearDataLabel(); + } + return true; + } + + return false; + } + + void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false) + { + Label hereLabel = generator->label(); + + if (m_backtrackToLabel) { + *m_backtrackToLabel = hereLabel; + m_backtrackToLabel = 0; + } + + m_backTrackJumps.link(generator); + + if (nextIteration) + generator->m_expressionState.linkToNextIteration(hereLabel); + + if (hasDataLabel()) { + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel)); + // data label cleared as a result of the clear() below + } + + clear(); + } + + void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false) + { + m_backTrackJumps.linkTo(label, generator); + + if (nextIteration) + generator->m_expressionState.linkToNextIteration(label); + + if (hasDataLabel()) { + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label)); + clearDataLabel(); + } + } + + private: + BacktrackType m_backtrackType; + int32_t m_backtrackStackOffset; + Label m_backtrackLabel; + DataLabelPtr m_dataLabelPtr; + Label* m_backtrackToLabel; + DataLabelPtr* m_subDataLabelPtr; + BacktrackDestination* m_nextBacktrack; + Label* m_backtrackSourceLabel; + JumpList* m_backtrackSourceJumps; + JumpList m_backTrackJumps; + }; + struct TermGenerationState { TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) : disjunction(disjunction) , checkedTotal(checkedTotal) + , m_subParenNum(0) + , m_linkedBacktrack(0) + , m_parenthesesTail(0) { } void resetAlternative() { - isBackTrackGenerated = false; + m_backtrack.clear(); alt = 0; } bool alternativeValid() @@ -323,11 +796,16 @@ class RegexGenerator : private MacroAssembler { { return disjunction->m_alternatives[alt]; } + bool isLastAlternative() + { + return (alt + 1) == disjunction->m_alternatives.size(); + } void resetTerm() { ASSERT(alternativeValid()); t = 0; + m_subParenNum = 0; } bool termValid() { @@ -349,11 +827,25 @@ class RegexGenerator : private MacroAssembler { ASSERT(alternativeValid()); return (t + 1) == alternative()->m_terms.size(); } + unsigned getSubParenNum() + { + return m_subParenNum++; + } bool isMainDisjunction() { return !disjunction->m_parent; } + void setParenthesesTail(ParenthesesTail* parenthesesTail) + { + m_parenthesesTail = parenthesesTail; + } + + ParenthesesTail* getParenthesesTail() + { + return m_parenthesesTail; + } + PatternTerm& lookaheadTerm() { ASSERT(alternativeValid()); @@ -374,52 +866,106 @@ class RegexGenerator : private MacroAssembler { return term().inputPosition - checkedTotal; } - void jumpToBacktrack(Jump jump, MacroAssembler* masm) + void clearBacktrack() { - if (isBackTrackGenerated) - jump.linkTo(backtrackLabel, masm); - else - backTrackJumps.append(jump); + m_backtrack.clear(false); + m_linkedBacktrack = 0; } - void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm) + + void jumpToBacktrack(MacroAssembler* masm) { - if (isBackTrackGenerated) - jumps.linkTo(backtrackLabel, masm); - else - backTrackJumps.append(jumps); + m_backtrack.jumpToBacktrack(masm); + } + + void jumpToBacktrack(RegexGenerator* generator, Jump jump) + { + m_backtrack.jumpToBacktrack(generator, jump); + } + + void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps) + { + m_backtrack.jumpToBacktrack(generator, jumps); + } + + bool plantJumpToBacktrackIfExists(RegexGenerator* generator) + { + return m_backtrack.plantJumpToBacktrackIfExists(generator); } - bool plantJumpToBacktrackIfExists(MacroAssembler* masm) + + bool linkDataLabelToBacktrackIfExists(RegexGenerator* generator) { - if (isBackTrackGenerated) { - masm->jump(backtrackLabel); + if ((m_backtrack.isLabel()) && (m_backtrack.hasDataLabel())) { + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_backtrack.getDataLabel(), m_backtrack.getLabel())); + m_backtrack.clearDataLabel(); return true; } + return false; } + void addBacktrackJump(Jump jump) { - backTrackJumps.append(jump); + m_backtrack.addBacktrackJump(jump); } - void setBacktrackGenerated(Label label) + + void setBacktrackDataLabel(DataLabelPtr dp) { - isBackTrackGenerated = true; - backtrackLabel = label; + m_backtrack.setDataLabel(dp); } - void linkAlternativeBacktracks(MacroAssembler* masm) + + void setBackTrackStackOffset(int32_t stackOffset) + { + m_backtrack.setStackOffset(stackOffset); + } + + void setBacktrackLabel(Label label) + { + m_backtrack.setLabel(label); + } + + void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false) + { + m_backtrack.linkAlternativeBacktracks(generator, nextIteration); + m_linkedBacktrack = 0; + } + + void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false) + { + m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration); + } + + void setBacktrackLink(BacktrackDestination* linkedBacktrack) + { + m_linkedBacktrack = linkedBacktrack; + } + + void chainBacktracks(BacktrackDestination* followonBacktrack) + { + if (m_linkedBacktrack) + m_linkedBacktrack->linkToNextBacktrack(followonBacktrack); + } + + void chainBacktrackJumps(JumpList* jumpList) { - isBackTrackGenerated = false; - backTrackJumps.link(masm); + if (m_linkedBacktrack && !(m_linkedBacktrack->hasDestination())) + m_linkedBacktrack->setBacktrackJumpList(jumpList); } - void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm) + + BacktrackDestination& getBacktrackDestination() { - isBackTrackGenerated = false; - backTrackJumps.linkTo(label, masm); + return m_backtrack; } - void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm) + + void propagateBacktrackingFrom(RegexGenerator* generator, BacktrackDestination& backtrack, bool doJump = true) { - jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm); - if (nestedParenthesesState.isBackTrackGenerated) - setBacktrackGenerated(nestedParenthesesState.backtrackLabel); + if (doJump) + m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps()); + if (backtrack.hasDestination()) { + if (m_backtrack.hasDataLabel()) + generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel()); + + m_backtrack.copyTarget(backtrack, doJump); + } } PatternDisjunction* disjunction; @@ -427,9 +973,154 @@ class RegexGenerator : private MacroAssembler { private: unsigned alt; unsigned t; - JumpList backTrackJumps; - Label backtrackLabel; - bool isBackTrackGenerated; + unsigned m_subParenNum; + BacktrackDestination m_backtrack; + BacktrackDestination* m_linkedBacktrack; + ParenthesesTail* m_parenthesesTail; + + }; + + struct ParenthesesTail { + ParenthesesTail(PatternTerm& term, int nestingLevel, ParenthesesTail* nextOuterParenTail) + : m_term(term) + , m_nestingLevel(nestingLevel) + , m_subParenIndex(0) + , m_nextOuterParenTail(nextOuterParenTail) + { + } + + void processBacktracks(RegexGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough) + { + m_nonGreedyTryParentheses = nonGreedyTryParentheses; + m_fallThrough = fallThrough; + + m_subParenIndex = state.getSubParenNum(); + parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack); + state.chainBacktracks(&m_backtrack); + BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); + stateBacktrack.copyTo(m_backtrack); + stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel); + state.setBacktrackLink(&m_backtrack); + stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr); + + m_doDirectBacktrack = m_parenBacktrack.hasDestination(); + + if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy)) + m_doDirectBacktrack = false; + + if (m_doDirectBacktrack) + state.propagateBacktrackingFrom(generator, m_parenBacktrack, false); + else { + stateBacktrack.setBacktrackJumpList(&m_pattBacktrackJumps); + stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens); + } + + parenthesesState.chainBacktrackJumps(&m_pattBacktrackJumps); + } + + void setNextIteration(Label nextIteration) + { + if (!m_nestingLevel && !m_backtrackToLabel.isSet()) + m_backtrackToLabel = nextIteration; + } + + void addAfterParenJump(Jump jump) + { + m_pattBacktrackJumps.append(jump); + } + + bool generateCode(RegexGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough) + { + const RegisterID indexTemporary = regT0; + unsigned parenthesesFrameLocation = m_term.frameLocation; + Jump fromPriorBacktrack; + bool needJumpForPriorParenTail = false; + + if (priorBackTrackFallThrough + && ((m_term.quantityType == QuantifierGreedy) + || (m_term.quantityType == QuantifierNonGreedy) + || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) { + // If the prior paren tail code assumed that it could fall through, + // but we need to generate after paren backtrack code, then provide + // a jump around that code for the prior paren tail code. + // A regular expressing like ((xxx)...)? needs this. + fromPriorBacktrack = generator->jump(); + needJumpForPriorParenTail = true; + } + + if (!m_backtrack.hasDestination()) { + if (m_backtrackToLabel.isSet()) { + m_backtrack.setLabel(m_backtrackToLabel); + nextBacktrackFallThrough = false; + } else if (!m_subParenIndex && m_nextOuterParenTail) { + // If we don't have a destination and we are the first term of a nested paren, go + // back to the outer paren. + // There is an optimization if the next outer paren is the next paren to be emitted. + // In that case we really want the else clause. + m_backtrack.setBacktrackJumpList(&m_nextOuterParenTail->m_withinBacktrackJumps); + nextBacktrackFallThrough = false; + } else + m_backtrack.setBacktrackJumpList(&jumpsToNext); + } else + nextBacktrackFallThrough = false; + + // A failure AFTER the parens jumps here - Backtrack to this paren + m_backtrackFromAfterParens = generator->label(); + + if (m_dataAfterLabelPtr.isSet()) + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens)); + + m_pattBacktrackJumps.link(generator); + + if (m_term.quantityType == QuantifierGreedy) { + // If this is -1 we have now tested with both with and without the parens. + generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); + m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, Imm32(-1))); + } else if (m_term.quantityType == QuantifierNonGreedy) { + // If this is -1 we have now tested with both with and without the parens. + generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); + generator->branch32(Equal, indexTemporary, Imm32(-1)).linkTo(m_nonGreedyTryParentheses, generator); + } + + if (!m_doDirectBacktrack) + m_parenBacktrack.plantJumpToBacktrackIfExists(generator); + + // A failure WITHIN the parens jumps here + if (needJumpForPriorParenTail) + fromPriorBacktrack.link(generator); + m_parenBacktrack.linkAlternativeBacktracks(generator); + m_withinBacktrackJumps.link(generator); + + if (m_term.capture()) + generator->store32(Imm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int))); + + if (m_term.quantityType == QuantifierGreedy) { + generator->storeToFrame(Imm32(-1), parenthesesFrameLocation); + generator->jump().linkTo(m_fallThrough, generator); + nextBacktrackFallThrough = false; + } else if (!nextBacktrackFallThrough) + m_backtrack.jumpToBacktrack(generator); + + if (!m_doDirectBacktrack) + m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens); + + return nextBacktrackFallThrough; + } + + PatternTerm& m_term; + int m_nestingLevel; + unsigned m_subParenIndex; + ParenthesesTail* m_nextOuterParenTail; + Label m_nonGreedyTryParentheses; + Label m_fallThrough; + Label m_backtrackToLabel; + Label m_backtrackFromAfterParens; + DataLabelPtr m_dataAfterLabelPtr; + JumpList m_pattBacktrackJumps; + JumpList m_withinBacktrackJumps; + BacktrackDestination m_parenBacktrack; + BacktrackDestination m_backtrack; + bool m_doDirectBacktrack; }; void generateAssertionBOL(TermGenerationState& state) @@ -445,15 +1136,15 @@ class RegexGenerator : private MacroAssembler { readCharacter(state.inputOffset() - 1, character); matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); matchDest.link(this); } else { // Erk, really should poison out these alternatives early. :-/ if (term.inputPosition) - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); else - state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this); + state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal))); } } @@ -470,15 +1161,15 @@ class RegexGenerator : private MacroAssembler { readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); matchDest.link(this); } else { if (term.inputPosition == state.checkedTotal) - state.jumpToBacktrack(notAtEndOfInput(), this); + state.jumpToBacktrack(this, notAtEndOfInput()); // Erk, really should poison out these alternatives early. :-/ else - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); } } @@ -512,20 +1203,20 @@ class RegexGenerator : private MacroAssembler { // We fall through to here if the last character was not a wordchar. JumpList nonWordCharThenWordChar; JumpList nonWordCharThenNonWordChar; - if (term.invertOrCapture) { + if (term.invert()) { matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); nonWordCharThenWordChar.append(jump()); } else { matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); nonWordCharThenNonWordChar.append(jump()); } - state.jumpToBacktrack(nonWordCharThenNonWordChar, this); + state.jumpToBacktrack(this, nonWordCharThenNonWordChar); // We jump here if the last character was a wordchar. matchDest.link(this); JumpList wordCharThenWordChar; JumpList wordCharThenNonWordChar; - if (term.invertOrCapture) { + if (term.invert()) { matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); wordCharThenWordChar.append(jump()); } else { @@ -533,8 +1224,8 @@ class RegexGenerator : private MacroAssembler { // This can fall-though! } - state.jumpToBacktrack(wordCharThenWordChar, this); - + state.jumpToBacktrack(this, wordCharThenWordChar); + nonWordCharThenWordChar.link(this); wordCharThenNonWordChar.link(this); } @@ -547,10 +1238,10 @@ class RegexGenerator : private MacroAssembler { if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { readCharacter(state.inputOffset(), character); or32(Imm32(32), character); - state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); + state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); } else { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); - state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this); + state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset())); } } @@ -562,7 +1253,7 @@ class RegexGenerator : private MacroAssembler { int mask = 0; int chPair = ch1 | (ch2 << 16); - + if (m_pattern.m_ignoreCase) { if (isASCIIAlpha(ch1)) mask |= 32; @@ -573,9 +1264,9 @@ class RegexGenerator : private MacroAssembler { if (mask) { load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); or32(Imm32(mask), character); - state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this); + state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask))); } else - state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this); + state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair))); } void generatePatternCharacterFixed(TermGenerationState& state) @@ -592,10 +1283,10 @@ class RegexGenerator : private MacroAssembler { if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); or32(Imm32(32), character); - state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); + state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); } else { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); - state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this); + state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch))); } add32(Imm32(1), countRegister); branch32(NotEqual, countRegister, index).linkTo(loop, this); @@ -607,7 +1298,7 @@ class RegexGenerator : private MacroAssembler { const RegisterID countRegister = regT1; PatternTerm& term = state.term(); UChar ch = term.patternCharacter; - + move(Imm32(0), countRegister); JumpList failures; @@ -624,7 +1315,7 @@ class RegexGenerator : private MacroAssembler { add32(Imm32(1), countRegister); add32(Imm32(1), index); - if (term.quantityCount != 0xffffffff) { + if (term.quantityCount != quantifyInfinite) { branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); failures.append(jump()); } else @@ -632,7 +1323,7 @@ class RegexGenerator : private MacroAssembler { Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); - state.jumpToBacktrack(branchTest32(Zero, countRegister), this); + state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); sub32(Imm32(1), countRegister); sub32(Imm32(1), index); @@ -640,7 +1331,7 @@ class RegexGenerator : private MacroAssembler { storeToFrame(countRegister, term.frameLocation); - state.setBacktrackGenerated(backtrackBegin); + state.setBacktrackLabel(backtrackBegin); } void generatePatternCharacterNonGreedy(TermGenerationState& state) @@ -649,20 +1340,20 @@ class RegexGenerator : private MacroAssembler { const RegisterID countRegister = regT1; PatternTerm& term = state.term(); UChar ch = term.patternCharacter; - + move(Imm32(0), countRegister); Jump firstTimeDoNothing = jump(); Label hardFail(this); sub32(countRegister, index); - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); atEndOfInput().linkTo(hardFail, this); - if (term.quantityCount != 0xffffffff) + if (term.quantityCount != quantifyInfinite) branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { readCharacter(state.inputOffset(), character); @@ -679,7 +1370,7 @@ class RegexGenerator : private MacroAssembler { firstTimeDoNothing.link(this); storeToFrame(countRegister, term.frameLocation); - state.setBacktrackGenerated(backtrackBegin); + state.setBacktrackLabel(backtrackBegin); } void generateCharacterClassSingle(TermGenerationState& state) @@ -691,10 +1382,10 @@ class RegexGenerator : private MacroAssembler { readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, term.characterClass); - if (term.invertOrCapture) - state.jumpToBacktrack(matchDest, this); + if (term.invert()) + state.jumpToBacktrack(this, matchDest); else { - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); matchDest.link(this); } } @@ -713,10 +1404,10 @@ class RegexGenerator : private MacroAssembler { load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); matchCharacterClass(character, matchDest, term.characterClass); - if (term.invertOrCapture) - state.jumpToBacktrack(matchDest, this); + if (term.invert()) + state.jumpToBacktrack(this, matchDest); else { - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); matchDest.link(this); } @@ -729,14 +1420,14 @@ class RegexGenerator : private MacroAssembler { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); - + move(Imm32(0), countRegister); JumpList failures; Label loop(this); failures.append(atEndOfInput()); - if (term.invertOrCapture) { + if (term.invert()) { readCharacter(state.inputOffset(), character); matchCharacterClass(character, failures, term.characterClass); } else { @@ -749,7 +1440,7 @@ class RegexGenerator : private MacroAssembler { add32(Imm32(1), countRegister); add32(Imm32(1), index); - if (term.quantityCount != 0xffffffff) { + if (term.quantityCount != quantifyInfinite) { branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); failures.append(jump()); } else @@ -757,7 +1448,7 @@ class RegexGenerator : private MacroAssembler { Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); - state.jumpToBacktrack(branchTest32(Zero, countRegister), this); + state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); sub32(Imm32(1), countRegister); sub32(Imm32(1), index); @@ -765,7 +1456,7 @@ class RegexGenerator : private MacroAssembler { storeToFrame(countRegister, term.frameLocation); - state.setBacktrackGenerated(backtrackBegin); + state.setBacktrackLabel(backtrackBegin); } void generateCharacterClassNonGreedy(TermGenerationState& state) @@ -773,14 +1464,14 @@ class RegexGenerator : private MacroAssembler { const RegisterID character = regT0; const RegisterID countRegister = regT1; PatternTerm& term = state.term(); - + move(Imm32(0), countRegister); Jump firstTimeDoNothing = jump(); Label hardFail(this); sub32(countRegister, index); - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); Label backtrackBegin(this); loadFromFrame(term.frameLocation, countRegister); @@ -792,7 +1483,7 @@ class RegexGenerator : private MacroAssembler { readCharacter(state.inputOffset(), character); matchCharacterClass(character, matchDest, term.characterClass); - if (term.invertOrCapture) + if (term.invert()) matchDest.linkTo(hardFail, this); else { jump(hardFail); @@ -805,14 +1496,14 @@ class RegexGenerator : private MacroAssembler { firstTimeDoNothing.link(this); storeToFrame(countRegister, term.frameLocation); - state.setBacktrackGenerated(backtrackBegin); + state.setBacktrackLabel(backtrackBegin); } void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) { ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); ASSERT(parenthesesTerm.quantityCount == 1); - + PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; @@ -834,12 +1525,12 @@ class RegexGenerator : private MacroAssembler { Label backtrackBegin(this); sub32(Imm32(countToCheck), index); state.addBacktrackJump(jump()); - + skip.link(this); - state.setBacktrackGenerated(backtrackBegin); + state.setBacktrackLabel(backtrackBegin); - state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this); + state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck)); state.checkedTotal += countToCheck; } @@ -849,6 +1540,7 @@ class RegexGenerator : private MacroAssembler { state.checkedTotal -= countToCheck; } else { JumpList successes; + bool propogateBacktrack = false; for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) { @@ -867,37 +1559,35 @@ class RegexGenerator : private MacroAssembler { // Matched an alternative. DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); - successes.append(jump()); + + if (!state.isLastAlternative() || countToCheck) + successes.append(jump()); // Alternative did not match. - Label backtrackLocation(this); - - // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one. - state.plantJumpToBacktrackIfExists(this); - - state.linkAlternativeBacktracks(this); + + state.setBacktrackDataLabel(dataLabel); + + // Do we have a backtrack destination? + // if so, link the data label to it. + state.linkDataLabelToBacktrackIfExists(this); + + if (!state.isLastAlternative() || countToCheck) + state.linkAlternativeBacktracks(this); if (countToCheck) { sub32(Imm32(countToCheck), index); state.checkedTotal -= countToCheck; - } - - m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation)); + } else if (state.isLastAlternative()) + propogateBacktrack = true; } // We fall through to here when the last alternative fails. // Add a backtrack out of here for the parenthese handling code to link up. - state.addBacktrackJump(jump()); + if (!propogateBacktrack) + state.addBacktrackJump(jump()); - // Generate a trampoline for the parens code to backtrack to, to retry the + // Save address on stack for the parens code to backtrack to, to retry the // next alternative. - state.setBacktrackGenerated(label()); - loadFromFrameAndJump(alternativeFrameLocation); - - // FIXME: both of the above hooks are a little inefficient, in that you - // may end up trampolining here, just to trampoline back out to the - // parentheses code, or vice versa. We can probably eliminate a jump - // by restructuring, but coding this way for now for simplicity during - // development. + state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*)); successes.link(this); } @@ -918,13 +1608,19 @@ class RegexGenerator : private MacroAssembler { alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; // optimized case - no capture & no quantifier can be handled in a light-weight manner. - if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) { + if (!term.capture() && (term.quantityType == QuantifierFixedCount)) { + m_expressionState.incrementParenNestingLevel(); + TermGenerationState parenthesesState(disjunction, state.checkedTotal); generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); // this expects that any backtracks back out of the parentheses will be in the - // parenthesesState's backTrackJumps vector, and that if they need backtracking - // they will have set an entry point on the parenthesesState's backtrackLabel. - state.propagateBacktrackingFrom(parenthesesState, this); + // parenthesesState's m_backTrackJumps vector, and that if they need backtracking + // they will have set an entry point on the parenthesesState's m_backtrackLabel. + BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination(); + state.propagateBacktrackingFrom(this, parenthesesBacktrack); + state.getBacktrackDestination().copyBacktrackToLabel(parenthesesBacktrack); + + m_expressionState.decrementParenNestingLevel(); } else { Jump nonGreedySkipParentheses; Label nonGreedyTryParentheses; @@ -938,7 +1634,7 @@ class RegexGenerator : private MacroAssembler { } // store the match start index - if (term.invertOrCapture) { + if (term.capture()) { int inputOffset = state.inputOffset() - preCheckedCount; if (inputOffset) { move(index, indexTemporary); @@ -948,45 +1644,24 @@ class RegexGenerator : private MacroAssembler { store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); } - // generate the body of the parentheses - TermGenerationState parenthesesState(disjunction, state.checkedTotal); - generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); + ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getParenthesesTail()); - Jump success = (term.quantityType == QuantifierFixedCount) ? - jump() : - branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))); + m_expressionState.incrementParenNestingLevel(); - // A failure AFTER the parens jumps here - Label backtrackFromAfterParens(this); + TermGenerationState parenthesesState(disjunction, state.checkedTotal); - if (term.quantityType == QuantifierGreedy) { - // If this is -1 we have now tested with both with and without the parens. - loadFromFrame(parenthesesFrameLocation, indexTemporary); - state.jumpToBacktrack(branch32(Equal, indexTemporary, Imm32(-1)), this); - } else if (term.quantityType == QuantifierNonGreedy) { - // If this is -1 we have now tested without the parens, now test with. - loadFromFrame(parenthesesFrameLocation, indexTemporary); - branch32(Equal, indexTemporary, Imm32(-1)).linkTo(nonGreedyTryParentheses, this); - } + // Save the parenthesesTail for backtracking from nested parens to this one. + parenthesesState.setParenthesesTail(parenthesesTail); - parenthesesState.plantJumpToBacktrackIfExists(this); - // A failure WITHIN the parens jumps here - parenthesesState.linkAlternativeBacktracks(this); - if (term.invertOrCapture) - store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); + // generate the body of the parentheses + generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); - if (term.quantityType == QuantifierGreedy) - storeToFrame(Imm32(-1), parenthesesFrameLocation); - else - state.jumpToBacktrack(jump(), this); - - state.setBacktrackGenerated(backtrackFromAfterParens); - if (term.quantityType == QuantifierNonGreedy) - nonGreedySkipParentheses.link(this); - success.link(this); + // For non-fixed counts, backtrack if we didn't match anything. + if (term.quantityType != QuantifierFixedCount) + parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))))); // store the match end index - if (term.invertOrCapture) { + if (term.capture()) { int inputOffset = state.inputOffset(); if (inputOffset) { move(index, indexTemporary); @@ -995,6 +1670,15 @@ class RegexGenerator : private MacroAssembler { } else store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); } + + m_expressionState.decrementParenNestingLevel(); + + parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label()); + + parenthesesState.getBacktrackDestination().clear(); + + if (term.quantityType == QuantifierNonGreedy) + nonGreedySkipParentheses.link(this); } } @@ -1033,6 +1717,7 @@ class RegexGenerator : private MacroAssembler { parenthesesState.plantJumpToBacktrackIfExists(this); parenthesesState.linkAlternativeBacktracks(this); + // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. if (countToCheck) { @@ -1057,7 +1742,7 @@ class RegexGenerator : private MacroAssembler { int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; - if (term.invertOrCapture) { + if (term.invert()) { // Inverted case storeToFrame(index, parenthesesFrameLocation); @@ -1069,10 +1754,11 @@ class RegexGenerator : private MacroAssembler { generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); // Success! - which means - Fail! loadFromFrame(parenthesesFrameLocation, index); - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); // And fail means success. parenthesesState.linkAlternativeBacktracks(this); + loadFromFrame(parenthesesFrameLocation, index); state.checkedTotal += countCheckedAfterAssertion; @@ -1091,8 +1777,9 @@ class RegexGenerator : private MacroAssembler { Jump success = jump(); parenthesesState.linkAlternativeBacktracks(this); + loadFromFrame(parenthesesFrameLocation, index); - state.jumpToBacktrack(jump(), this); + state.jumpToBacktrack(this); success.link(this); @@ -1108,15 +1795,15 @@ class RegexGenerator : private MacroAssembler { case PatternTerm::TypeAssertionBOL: generateAssertionBOL(state); break; - + case PatternTerm::TypeAssertionEOL: generateAssertionEOL(state); break; - + case PatternTerm::TypeAssertionWordBoundary: generateAssertionWordBoundary(state); break; - + case PatternTerm::TypePatternCharacter: switch (term.quantityType) { case QuantifierFixedCount: @@ -1195,7 +1882,7 @@ class RegexGenerator : private MacroAssembler { // sufficient input available to run the first repeating alternative. // The label 'firstAlternativeInputChecked' will jump directly to matching // the first repeating alternative having skipped this check. - + if (state.alternativeValid()) { PatternAlternative* alternative = state.alternative(); if (!alternative->onceThrough()) { @@ -1219,7 +1906,7 @@ class RegexGenerator : private MacroAssembler { // Track whether any alternatives are shorter than the first one. if (!alternative->onceThrough()) hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); - + for (state.resetTerm(); state.termValid(); state.nextTerm()) generateTerm(state); @@ -1242,6 +1929,8 @@ class RegexGenerator : private MacroAssembler { generateReturn(); state.nextAlternative(); + if (alternative->onceThrough() && state.alternativeValid()) + state.clearBacktrack(); // if there are any more alternatives, plant the check for input before looping. if (state.alternativeValid()) { @@ -1249,45 +1938,46 @@ class RegexGenerator : private MacroAssembler { if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) { // We have handled non-repeating alternatives, jump to next iteration // and loop over repeating alternatives. - state.jumpToBacktrack(jump(), this); - + state.jumpToBacktrack(this); + countToCheckForFirstAlternative = nextAlternative->m_minimumSize; - + // If we get here, there the last input checked failed. notEnoughInputForPreviousAlternative.link(this); - + state.linkAlternativeBacktracks(this); // Back up to start the looping alternatives. if (countCheckedForCurrentAlternative) sub32(Imm32(countCheckedForCurrentAlternative), index); - + firstAlternative = Label(this); - + state.checkedTotal = countToCheckForFirstAlternative; if (countToCheckForFirstAlternative) notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); - + countCheckedForCurrentAlternative = countToCheckForFirstAlternative; - + firstAlternativeInputChecked = Label(this); setRepeatAlternativeLabels = true; } else { int countToCheckForNextAlternative = nextAlternative->m_minimumSize; - + if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. // If we get here, then the last input checked failed. notEnoughInputForPreviousAlternative.link(this); - + // Check if sufficent input available to run the next alternative notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); // We are now in the correct state to enter the next alternative; this add is only required // to mirror and revert operation of the sub32, just below. add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); - + // If we get here, then the last input checked passed. state.linkAlternativeBacktracks(this); + // No need to check if we can run the next alternative, since it is shorter - // just update index. sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); @@ -1299,13 +1989,14 @@ class RegexGenerator : private MacroAssembler { notEnoughInputForPreviousAlternative.link(this); add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); notEnoughInputForPreviousAlternative.append(jump()); - + // The next alternative is longer than the current one; check the difference. state.linkAlternativeBacktracks(this); + notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); } else { // CASE 3: Both alternatives are the same length. ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); - + // If the next alterative is the same length as this one, then no need to check the input - // if there was sufficent input to run the current alternative then there is sufficient // input to run the next one; if not, there isn't. @@ -1317,7 +2008,7 @@ class RegexGenerator : private MacroAssembler { } } } - + // If we get here, all Alternatives failed... state.checkedTotal -= countCheckedForCurrentAlternative; @@ -1325,7 +2016,8 @@ class RegexGenerator : private MacroAssembler { if (!setRepeatAlternativeLabels) { // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link // the match failures to this point, and fall through to the return below. - state.linkAlternativeBacktracks(this); + state.linkAlternativeBacktracks(this, true); + notEnoughInputForPreviousAlternative.link(this); } else { // How much more input need there be to be able to retry from the first alternative? @@ -1345,11 +2037,11 @@ class RegexGenerator : private MacroAssembler { // First, deal with the cases where there was sufficient input to try the last alternative. if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. - state.linkAlternativeBacktracks(this); + state.linkAlternativeBacktracks(this, true); else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! - state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this); + state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true); else { // no need to check the input, but we do have some bookkeeping to do first. - state.linkAlternativeBacktracks(this); + state.linkAlternativeBacktracks(this, true); // Where necessary update our preserved start position. if (!m_pattern.m_body->m_hasFixedSize) { @@ -1374,7 +2066,7 @@ class RegexGenerator : private MacroAssembler { } else store32(index, Address(output)); } - + // Check if there is sufficent input to run the first alternative again. jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); // No - insufficent input to run the first alteranative, are there any other alternatives we @@ -1406,6 +2098,10 @@ class RegexGenerator : private MacroAssembler { move(Imm32(-1), returnRegister); generateReturn(); + + m_expressionState.emitParenthesesTail(this); + m_expressionState.emitIndirectJumpTable(this); + m_expressionState.linkToNextIteration(this); } void generateEnter() @@ -1490,47 +2186,28 @@ public: { generate(); - LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()), 0); + LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0); - for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) - patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation)); + for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i) + patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation)); jitObject.set(patchBuffer.finalizeCode()); - } - - bool shouldFallBack() - { - return m_shouldFallBack; + jitObject.setFallBack(m_shouldFallBack); } private: RegexPattern& m_pattern; bool m_shouldFallBack; - Vector<AlternativeBacktrackRecord> m_backtrackRecords; + GenerationState m_expressionState; }; -void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase, bool multiline) +void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject) { - RegexPattern pattern(ignoreCase, multiline); - if ((error = compileRegex(patternString, pattern))) - return; - numSubpatterns = pattern.m_numSubpatterns; - - if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { - RegexGenerator generator(pattern); - generator.compile(globalData, jitObject); - if (!generator.shouldFallBack()) - return; - } - - jitObject.setFallback(byteCompileRegex(pattern, allocator)); + RegexGenerator generator(pattern); + generator.compile(globalData, jitObject); } + }} #endif - - - - - diff --git a/JavaScriptCore/yarr/RegexJIT.h b/JavaScriptCore/yarr/RegexJIT.h index c4c382c..5e3dca1 100644 --- a/JavaScriptCore/yarr/RegexJIT.h +++ b/JavaScriptCore/yarr/RegexJIT.h @@ -29,7 +29,6 @@ #if ENABLE(YARR_JIT) #include "MacroAssembler.h" -#include "RegexInterpreter.h" // temporary, remove when fallback is removed. #include "RegexPattern.h" #include "UString.h" @@ -51,7 +50,7 @@ class RegexCodeBlock { public: RegexCodeBlock() - : m_needFallback(false) + : m_needFallBack(false) { } @@ -59,15 +58,8 @@ public: { } - BytecodePattern* getFallback() { return m_fallback.get(); } - bool isFallback() { return m_needFallback; } - void setFallback(PassOwnPtr<BytecodePattern> fallback) - { - m_fallback = fallback; - m_needFallback = true; - } - - bool operator!() { return (!m_ref.m_code.executableAddress() && !m_fallback); } + void setFallBack(bool fallback) { m_needFallBack = fallback; } + bool isFallBack() { return m_needFallBack; } void set(MacroAssembler::CodeRef ref) { m_ref = ref; } int execute(const UChar* input, unsigned start, unsigned length, int* output) @@ -81,17 +73,13 @@ public: private: MacroAssembler::CodeRef m_ref; - OwnPtr<Yarr::BytecodePattern> m_fallback; - bool m_needFallback; + bool m_needFallBack; }; -void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase = false, bool multiline = false); +void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject); inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output) { - if (jitObject.isFallback()) - return (interpretRegex(jitObject.getFallback(), input, start, length, output)); - return jitObject.execute(input, start, length, output); } diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h index a5c0ba2..ec5f589 100644 --- a/JavaScriptCore/yarr/RegexParser.h +++ b/JavaScriptCore/yarr/RegexParser.h @@ -33,6 +33,8 @@ namespace JSC { namespace Yarr { +static const unsigned quantifyInfinite = UINT_MAX; + enum BuiltInCharacterClassID { DigitClassID, SpaceClassID, @@ -75,7 +77,7 @@ private: CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err) : m_delegate(delegate) , m_err(err) - , m_state(empty) + , m_state(Empty) { } @@ -90,54 +92,67 @@ private: } /* - * atomPatternCharacterUnescaped(): + * atomPatternCharacter(): * - * This method is called directly from parseCharacterClass(), to report a new - * pattern character token. This method differs from atomPatternCharacter(), - * which will be called from parseEscape(), since a hypen provided via this - * method may be indicating a character range, but a hyphen parsed by - * parseEscape() cannot be interpreted as doing so. + * This method is called either from parseCharacterClass() (for an unescaped + * character in a character class), or from parseEscape(). In the former case + * the value true will be passed for the argument 'hyphenIsRange', and in this + * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/ + * is different to /[a\-z]/). */ - void atomPatternCharacterUnescaped(UChar ch) + void atomPatternCharacter(UChar ch, bool hyphenIsRange = false) { switch (m_state) { - case empty: + case AfterCharacterClass: + // Following a builtin character class we need look out for a hyphen. + // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/. + // If we see a hyphen following a charater class then unlike usual + // we'll report it to the delegate immediately, and put ourself into + // a poisoned state. Any following calls to add another character or + // character class will result in an error. (A hypen following a + // character-class is itself valid, but only at the end of a regex). + if (hyphenIsRange && ch == '-') { + m_delegate.atomCharacterClassAtom('-'); + m_state = AfterCharacterClassHyphen; + return; + } + // Otherwise just fall through - cached character so treat this as Empty. + + case Empty: m_character = ch; - m_state = cachedCharacter; - break; + m_state = CachedCharacter; + return; - case cachedCharacter: - if (ch == '-') - m_state = cachedCharacterHyphen; + case CachedCharacter: + if (hyphenIsRange && ch == '-') + m_state = CachedCharacterHyphen; else { m_delegate.atomCharacterClassAtom(m_character); m_character = ch; } - break; + return; - case cachedCharacterHyphen: - if (ch >= m_character) - m_delegate.atomCharacterClassRange(m_character, ch); - else + case CachedCharacterHyphen: + if (ch < m_character) { m_err = CharacterClassOutOfOrder; - m_state = empty; - } - } - - /* - * atomPatternCharacter(): - * - * Adds a pattern character, called by parseEscape(), as such will not - * interpret a hyphen as indicating a character range. - */ - void atomPatternCharacter(UChar ch) - { - // Flush if a character is already pending to prevent the - // hyphen from begin interpreted as indicating a range. - if((ch == '-') && (m_state == cachedCharacter)) - flush(); + return; + } + m_delegate.atomCharacterClassRange(m_character, ch); + m_state = Empty; + return; - atomPatternCharacterUnescaped(ch); + // See coment in atomBuiltInCharacterClass below. + // This too is technically an error, per ECMA-262, and again we + // we chose to allow this. Note a subtlely here that while we + // diverge from the spec's definition of CharacterRange we do + // remain in compliance with the grammar. For example, consider + // the expression /[\d-a-z]/. We comply with the grammar in + // this case by not allowing a-z to be matched as a range. + case AfterCharacterClassHyphen: + m_delegate.atomCharacterClassAtom(ch); + m_state = Empty; + return; + } } /* @@ -147,8 +162,34 @@ private: */ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) { - flush(); - m_delegate.atomCharacterClassBuiltIn(classID, invert); + switch (m_state) { + case CachedCharacter: + // Flush the currently cached character, then fall through. + m_delegate.atomCharacterClassAtom(m_character); + + case Empty: + case AfterCharacterClass: + m_state = AfterCharacterClass; + m_delegate.atomCharacterClassBuiltIn(classID, invert); + return; + + // If we hit either of these cases, we have an invalid range that + // looks something like /[x-\d]/ or /[\d-\d]/. + // According to ECMA-262 this should be a syntax error, but + // empirical testing shows this to break teh webz. Instead we + // comply with to the ECMA-262 grammar, and assume the grammar to + // have matched the range correctly, but tweak our interpretation + // of CharacterRange. Effectively we implicitly handle the hyphen + // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/. + case CachedCharacterHyphen: + m_delegate.atomCharacterClassAtom(m_character); + m_delegate.atomCharacterClassAtom('-'); + // fall through + case AfterCharacterClassHyphen: + m_delegate.atomCharacterClassBuiltIn(classID, invert); + m_state = Empty; + return; + } } /* @@ -158,7 +199,12 @@ private: */ void end() { - flush(); + if (m_state == CachedCharacter) + m_delegate.atomCharacterClassAtom(m_character); + else if (m_state == CachedCharacterHyphen) { + m_delegate.atomCharacterClassAtom(m_character); + m_delegate.atomCharacterClassAtom('-'); + } m_delegate.atomCharacterClassEnd(); } @@ -168,21 +214,14 @@ private: void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); } private: - void flush() - { - if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen - m_delegate.atomCharacterClassAtom(m_character); - if (m_state == cachedCharacterHyphen) - m_delegate.atomCharacterClassAtom('-'); - m_state = empty; - } - Delegate& m_delegate; ErrorCode& m_err; enum CharacterClassConstructionState { - empty, - cachedCharacter, - cachedCharacterHyphen, + Empty, + CachedCharacter, + CachedCharacterHyphen, + AfterCharacterClass, + AfterCharacterClassHyphen, } m_state; UChar m_character; }; @@ -428,7 +467,7 @@ private: break; default: - characterClassConstructor.atomPatternCharacterUnescaped(consume()); + characterClassConstructor.atomPatternCharacter(consume(), true); } if (m_err) @@ -572,13 +611,13 @@ private: case '*': consume(); - parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX); + parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite); lastTokenWasAnAtom = false; break; case '+': consume(); - parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX); + parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite); lastTokenWasAnAtom = false; break; @@ -597,7 +636,7 @@ private: unsigned max = min; if (tryConsume(',')) - max = peekIsDigit() ? consumeNumber() : UINT_MAX; + max = peekIsDigit() ? consumeNumber() : quantifyInfinite; if (tryConsume('}')) { if (min <= max) @@ -838,7 +877,7 @@ private: */ template<class Delegate> -const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX) +const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite) { return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse(); } diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h index c76c641..74ee897 100644 --- a/JavaScriptCore/yarr/RegexPattern.h +++ b/JavaScriptCore/yarr/RegexPattern.h @@ -39,7 +39,7 @@ namespace JSC { namespace Yarr { #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. #define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1 -#define RegexStackSpaceForBackTrackInfoParentheses 4 +#define RegexStackSpaceForBackTrackInfoParentheses 2 struct PatternDisjunction; @@ -103,11 +103,12 @@ struct PatternTerm { TypeParenthesesSubpattern, TypeParentheticalAssertion, } type; - bool invertOrCapture; + bool m_capture :1; + bool m_invert :1; union { UChar patternCharacter; CharacterClass* characterClass; - unsigned subpatternId; + unsigned backReferenceSubpatternId; struct { PatternDisjunction* disjunction; unsigned subpatternId; @@ -123,6 +124,8 @@ struct PatternTerm { PatternTerm(UChar ch) : type(PatternTerm::TypePatternCharacter) + , m_capture(false) + , m_invert(false) { patternCharacter = ch; quantityType = QuantifierFixedCount; @@ -131,16 +134,18 @@ struct PatternTerm { PatternTerm(CharacterClass* charClass, bool invert) : type(PatternTerm::TypeCharacterClass) - , invertOrCapture(invert) + , m_capture(false) + , m_invert(invert) { characterClass = charClass; quantityType = QuantifierFixedCount; quantityCount = 1; } - PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) + PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false) : type(type) - , invertOrCapture(invertOrCapture) + , m_capture(capture) + , m_invert(invert) { parentheses.disjunction = disjunction; parentheses.subpatternId = subpatternId; @@ -152,7 +157,8 @@ struct PatternTerm { PatternTerm(Type type, bool invert = false) : type(type) - , invertOrCapture(invert) + , m_capture(false) + , m_invert(invert) { quantityType = QuantifierFixedCount; quantityCount = 1; @@ -160,9 +166,10 @@ struct PatternTerm { PatternTerm(unsigned spatternId) : type(TypeBackReference) - , invertOrCapture(false) + , m_capture(false) + , m_invert(false) { - subpatternId = spatternId; + backReferenceSubpatternId = spatternId; quantityType = QuantifierFixedCount; quantityCount = 1; } @@ -189,12 +196,12 @@ struct PatternTerm { bool invert() { - return invertOrCapture; + return m_invert; } bool capture() { - return invertOrCapture; + return m_capture; } void quantify(unsigned count, QuantifierType type) |