summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/yarr
diff options
context:
space:
mode:
authorBen Murdoch <benm@google.com>2011-05-05 14:36:32 +0100
committerBen Murdoch <benm@google.com>2011-05-10 15:38:30 +0100
commitf05b935882198ccf7d81675736e3aeb089c5113a (patch)
tree4ea0ca838d9ef1b15cf17ddb3928efb427c7e5a1 /JavaScriptCore/yarr
parent60fbdcc62bced8db2cb1fd233cc4d1e4ea17db1b (diff)
downloadexternal_webkit-f05b935882198ccf7d81675736e3aeb089c5113a.zip
external_webkit-f05b935882198ccf7d81675736e3aeb089c5113a.tar.gz
external_webkit-f05b935882198ccf7d81675736e3aeb089c5113a.tar.bz2
Merge WebKit at r74534: Initial merge by git.
Change-Id: I6ccd1154fa1b19c2ec2a66878eb675738735f1eb
Diffstat (limited to 'JavaScriptCore/yarr')
-rw-r--r--JavaScriptCore/yarr/RegexCompiler.cpp23
-rw-r--r--JavaScriptCore/yarr/RegexInterpreter.cpp79
-rw-r--r--JavaScriptCore/yarr/RegexInterpreter.h30
-rw-r--r--JavaScriptCore/yarr/RegexJIT.cpp1071
-rw-r--r--JavaScriptCore/yarr/RegexJIT.h22
-rw-r--r--JavaScriptCore/yarr/RegexParser.h151
-rw-r--r--JavaScriptCore/yarr/RegexPattern.h29
7 files changed, 1048 insertions, 357 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp
index 06ecbad..9063359 100644
--- a/JavaScriptCore/yarr/RegexCompiler.cpp
+++ b/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -459,7 +459,7 @@ public:
PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
m_pattern.m_disjunctions.append(parenthesesDisjunction);
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture));
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
m_alternative = parenthesesDisjunction->addNewAlternative();
}
@@ -467,7 +467,7 @@ public:
{
PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
m_pattern.m_disjunctions.append(parenthesesDisjunction);
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert));
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
m_alternative = parenthesesDisjunction->addNewAlternative();
m_invertParentheticalAssertion = invert;
}
@@ -477,16 +477,16 @@ public:
ASSERT(m_alternative->m_parent);
ASSERT(m_alternative->m_parent->m_parent);
- PatternDisjunction* parenthesisDisjunction = m_alternative->m_parent;
+ PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent;
m_alternative = m_alternative->m_parent->m_parent;
PatternTerm& lastTerm = m_alternative->lastTerm();
- unsigned numParenAlternatives = parenthesisDisjunction->m_alternatives.size();
+ unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
unsigned numBOLAnchoredAlts = 0;
// Bubble up BOL flags
for (unsigned i = 0; i < numParenAlternatives; i++) {
- if (parenthesisDisjunction->m_alternatives[i]->m_startsWithBOL)
+ if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
numBOLAnchoredAlts++;
}
@@ -520,7 +520,7 @@ public:
PatternTerm& term = currentAlternative->lastTerm();
ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
- if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) {
+ if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) {
m_alternative->m_terms.append(PatternTerm::ForwardReference());
return;
}
@@ -595,7 +595,7 @@ public:
term.quantify(min, QuantifierFixedCount);
m_alternative->m_terms.append(copyTerm(term));
// NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
- m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
m_alternative->lastTerm().parentheses.isCopy = true;
}
@@ -734,7 +734,8 @@ public:
// This optimization identifies sets of parentheses that we will never need to backtrack.
// In these cases we do not need to store state from prior iterations.
// We can presently avoid backtracking for:
- // * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction).
+ // * where the parens are at the end of the regular expression (last term in any of the
+ // alternatives of the main body disjunction).
// * where the parens are non-capturing, and quantified unbounded greedy (*).
// * where the parens do not contain any capturing subpatterns.
void checkForTerminalParentheses()
@@ -745,13 +746,13 @@ public:
return;
Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
- for (unsigned i =0; i < alternatives.size(); ++i) {
+ for (size_t i = 0; i < alternatives.size(); ++i) {
Vector<PatternTerm>& terms = alternatives[i]->m_terms;
if (terms.size()) {
PatternTerm& term = terms.last();
if (term.type == PatternTerm::TypeParenthesesSubpattern
&& term.quantityType == QuantifierGreedy
- && term.quantityCount == UINT_MAX
+ && term.quantityCount == quantifyInfinite
&& !term.capture())
term.parentheses.isTerminal = true;
}
@@ -844,7 +845,7 @@ public:
return false;
case PatternTerm::TypeParentheticalAssertion:
- if (term.invertOrCapture)
+ if (term.invert())
return false;
case PatternTerm::TypeParenthesesSubpattern:
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index 164158e..9900cbe 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -68,8 +68,6 @@ public:
struct BackTrackInfoParentheses {
uintptr_t matchAmount;
ParenthesesDisjunctionContext* lastContext;
- uintptr_t prevBegin;
- uintptr_t prevEnd;
};
static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
@@ -515,8 +513,7 @@ public:
if (matchEnd == -1)
return true;
- ASSERT((matchBegin == -1) == (matchEnd == -1));
- ASSERT(matchBegin <= matchEnd);
+ ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
if (matchBegin == matchEnd)
return true;
@@ -558,8 +555,7 @@ public:
int matchBegin = output[(term.atom.subpatternId << 1)];
int matchEnd = output[(term.atom.subpatternId << 1) + 1];
- ASSERT((matchBegin == -1) == (matchEnd == -1));
- ASSERT(matchBegin <= matchEnd);
+ ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
if (matchBegin == matchEnd)
return false;
@@ -719,7 +715,7 @@ public:
if (term.capture()) {
// Technically this access to inputPosition should be accessing the begin term's
// inputPosition, but for repeats other than fixed these values should be
- // the same anyway! (we don't pre-check for greedy or non-greedy matches.)
+ // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
unsigned subpatternId = term.atom.subpatternId;
@@ -739,7 +735,7 @@ public:
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == UINT_MAX);
+ ASSERT(term.atom.quantityCount == quantifyInfinite);
ASSERT(!term.capture());
BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
@@ -765,7 +761,7 @@ public:
{
ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == UINT_MAX);
+ ASSERT(term.atom.quantityCount == quantifyInfinite);
ASSERT(!term.capture());
// If we backtrack to this point, we have failed to match this iteration of the parens.
@@ -777,7 +773,7 @@ public:
bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
{
// 'Terminal' parentheses are at the end of the regex, and as such a match past end
- // should always be returned as a successful match - we should never becktrack to here.
+ // should always be returned as a successful match - we should never backtrack to here.
ASSERT_NOT_REACHED();
return false;
}
@@ -843,13 +839,8 @@ public:
ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
-
- unsigned subpatternId = term.atom.subpatternId;
ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
- backTrack->prevBegin = output[(subpatternId << 1)];
- backTrack->prevEnd = output[(subpatternId << 1) + 1];
-
backTrack->matchAmount = 0;
backTrack->lastContext = 0;
@@ -929,13 +920,6 @@ public:
ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = backTrack->prevBegin;
- output[(subpatternId << 1) + 1] = backTrack->prevEnd;
- }
-
ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
switch (term.atom.quantityType) {
@@ -1524,7 +1508,7 @@ public:
{
int beginTerm = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
@@ -1537,7 +1521,7 @@ public:
{
int beginTerm = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
@@ -1554,7 +1538,7 @@ public:
int beginTerm = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
@@ -1567,7 +1551,7 @@ public:
{
int beginTerm = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
@@ -1584,10 +1568,10 @@ public:
ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
- bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
+ bool invert = m_bodyDisjunction->terms[beginTerm].invert();
unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, invertOrCapture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
@@ -1679,7 +1663,7 @@ public:
ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
- bool invertOrCapture = parenthesesBegin.invertOrCapture;
+ bool capture = parenthesesBegin.capture();
unsigned subpatternId = parenthesesBegin.atom.subpatternId;
unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
@@ -1693,7 +1677,7 @@ public:
m_bodyDisjunction->terms.shrink(beginTerm);
m_allParenthesesInfo.append(parenthesesDisjunction);
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
@@ -1708,10 +1692,10 @@ public:
ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
+ bool capture = m_bodyDisjunction->terms[beginTerm].capture();
unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
@@ -1730,10 +1714,10 @@ public:
ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
- bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
+ bool capture = m_bodyDisjunction->terms[beginTerm].capture();
unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, invertOrCapture, inputPosition));
+ m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
@@ -1816,7 +1800,7 @@ public:
break;
case PatternTerm::TypeAssertionWordBoundary:
- assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked);
+ assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
break;
case PatternTerm::TypePatternCharacter:
@@ -1824,11 +1808,11 @@ public:
break;
case PatternTerm::TypeCharacterClass:
- atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
break;
case PatternTerm::TypeBackReference:
- atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
break;
case PatternTerm::TypeForwardReference:
@@ -1844,17 +1828,17 @@ public:
else
alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesOnceBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
+ atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
} else if (term.parentheses.isTerminal) {
unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
+ atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
} else {
unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
}
@@ -1867,7 +1851,7 @@ public:
ASSERT(currentCountAlreadyChecked >= (unsigned)term.inputPosition);
int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
- atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
+ atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true);
atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
break;
@@ -1885,19 +1869,6 @@ private:
Vector<ByteDisjunction*> m_allParenthesesInfo;
};
-
-PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase, bool multiline)
-{
- RegexPattern pattern(ignoreCase, multiline);
-
- if ((error = compileRegex(patternString, pattern)))
- return PassOwnPtr<BytecodePattern>();
-
- numSubpatterns = pattern.m_numSubpatterns;
-
- return ByteCompiler(pattern).compile(allocator);
-}
-
PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator* allocator)
{
return ByteCompiler(pattern).compile(allocator);
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index 2e23472..0fd8a57 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -87,7 +87,6 @@ struct ByteTerm {
TypeParentheticalAssertionEnd,
TypeCheckInput,
} type;
- bool invertOrCapture;
union {
struct {
union {
@@ -114,10 +113,14 @@ struct ByteTerm {
unsigned checkInputCount;
};
unsigned frameLocation;
+ bool m_capture : 1;
+ bool m_invert : 1;
int inputPosition;
ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
: frameLocation(frameLocation)
+ , m_capture(false)
+ , m_invert(false)
{
switch (quantityType) {
case QuantifierFixedCount:
@@ -139,6 +142,8 @@ struct ByteTerm {
ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
: frameLocation(frameLocation)
+ , m_capture(false)
+ , m_invert(false)
{
switch (quantityType) {
case QuantifierFixedCount:
@@ -161,7 +166,8 @@ struct ByteTerm {
ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
: type(ByteTerm::TypeCharacterClass)
- , invertOrCapture(invert)
+ , m_capture(false)
+ , m_invert(invert)
{
atom.characterClass = characterClass;
atom.quantityType = QuantifierFixedCount;
@@ -169,9 +175,10 @@ struct ByteTerm {
inputPosition = inputPos;
}
- ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos)
+ ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
: type(type)
- , invertOrCapture(invertOrCapture)
+ , m_capture(capture)
+ , m_invert(false)
{
atom.subpatternId = subpatternId;
atom.parenthesesDisjunction = parenthesesInfo;
@@ -182,15 +189,17 @@ struct ByteTerm {
ByteTerm(Type type, bool invert = false)
: type(type)
- , invertOrCapture(invert)
+ , m_capture(false)
+ , m_invert(invert)
{
atom.quantityType = QuantifierFixedCount;
atom.quantityCount = 1;
}
- ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos)
+ ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
: type(type)
- , invertOrCapture(invertOrCapture)
+ , m_capture(capture)
+ , m_invert(invert)
{
atom.subpatternId = subpatternId;
atom.quantityType = QuantifierFixedCount;
@@ -228,7 +237,7 @@ struct ByteTerm {
static ByteTerm BackReference(unsigned subpatternId, int inputPos)
{
- return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
+ return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
}
static ByteTerm BodyAlternativeBegin(bool onceThrough)
@@ -297,12 +306,12 @@ struct ByteTerm {
bool invert()
{
- return invertOrCapture;
+ return m_invert;
}
bool capture()
{
- return invertOrCapture;
+ return m_capture;
}
};
@@ -364,7 +373,6 @@ private:
Vector<CharacterClass*> m_userCharacterClasses;
};
-PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator*, bool ignoreCase = false, bool multiline = false);
PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*);
int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
diff --git a/JavaScriptCore/yarr/RegexJIT.cpp b/JavaScriptCore/yarr/RegexJIT.cpp
index acbd458..9e6756d 100644
--- a/JavaScriptCore/yarr/RegexJIT.cpp
+++ b/JavaScriptCore/yarr/RegexJIT.cpp
@@ -31,7 +31,6 @@
#include "LinkBuffer.h"
#include "MacroAssembler.h"
#include "RegexCompiler.h"
-#include "RegexInterpreter.h" // temporary, remove when fallback is removed.
#if ENABLE(YARR_JIT)
@@ -111,16 +110,16 @@ class RegexGenerator : private MacroAssembler {
int which = count >> 1;
char lo = ranges[which].begin;
char hi = ranges[which].end;
-
+
// check if there are any ranges or matches below lo. If not, just jl to failure -
// if there is anything else to check, check that first, if it falls through jmp to failure.
if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
-
+
// generate code for all ranges before this one
if (which)
matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
-
+
while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
++*matchIndex;
@@ -155,25 +154,25 @@ class RegexGenerator : private MacroAssembler {
{
if (charClass->m_table) {
ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
- matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
+ matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
return;
}
Jump unicodeFail;
if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
-
+
if (charClass->m_matchesUnicode.size()) {
for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
UChar ch = charClass->m_matchesUnicode[i];
matchDest.append(branch32(Equal, character, Imm32(ch)));
}
}
-
+
if (charClass->m_rangesUnicode.size()) {
for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
UChar lo = charClass->m_rangesUnicode[i].begin;
UChar hi = charClass->m_rangesUnicode[i].end;
-
+
Jump below = branch32(LessThan, character, Imm32(lo));
matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
below.link(this);
@@ -186,7 +185,7 @@ class RegexGenerator : private MacroAssembler {
if (charClass->m_ranges.size()) {
unsigned matchIndex = 0;
- JumpList failures;
+ JumpList failures;
matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
while (matchIndex < charClass->m_matches.size())
matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
@@ -288,6 +287,27 @@ class RegexGenerator : private MacroAssembler {
jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
}
+ struct IndirectJumpEntry {
+ IndirectJumpEntry(int32_t stackOffset)
+ : m_stackOffset(stackOffset)
+ {
+ }
+
+ IndirectJumpEntry(int32_t stackOffset, Jump jump)
+ : m_stackOffset(stackOffset)
+ {
+ addJump(jump);
+ }
+
+ void addJump(Jump jump)
+ {
+ m_relJumps.append(jump);
+ }
+
+ int32_t m_stackOffset;
+ JumpList m_relJumps;
+ };
+
struct AlternativeBacktrackRecord {
DataLabelPtr dataLabel;
Label backtrackLocation;
@@ -299,16 +319,469 @@ class RegexGenerator : private MacroAssembler {
}
};
+ struct ParenthesesTail;
+ struct TermGenerationState;
+
+ struct GenerationState {
+ typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap;
+
+ GenerationState()
+ : m_parenNestingLevel(0)
+ {
+ }
+
+ void addIndirectJumpEntry(int32_t stackOffset, Jump jump)
+ {
+ IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset);
+
+ ASSERT(stackOffset >= 0);
+
+ uint32_t offset = static_cast<uint32_t>(stackOffset);
+
+ if (result == m_indirectJumpMap.end())
+ m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump));
+ else
+ result->second->addJump(jump);
+ }
+
+ void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps)
+ {
+ JumpList::JumpVector jumpVector = jumps.jumps();
+ size_t size = jumpVector.size();
+ for (size_t i = 0; i < size; ++i)
+ addIndirectJumpEntry(stackOffset, jumpVector[i]);
+
+ jumps.empty();
+ }
+
+ void emitIndirectJumpTable(MacroAssembler* masm)
+ {
+ for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) {
+ IndirectJumpEntry* indJumpEntry = iter->second;
+ indJumpEntry->m_relJumps.link(masm);
+ masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset));
+ delete indJumpEntry;
+ }
+ }
+
+ void incrementParenNestingLevel()
+ {
+ ++m_parenNestingLevel;
+ }
+
+ void decrementParenNestingLevel()
+ {
+ --m_parenNestingLevel;
+ }
+
+ ParenthesesTail* addParenthesesTail(PatternTerm& term, ParenthesesTail* nextOuterParenTail)
+ {
+ ParenthesesTail* parenthesesTail = new ParenthesesTail(term, m_parenNestingLevel, nextOuterParenTail);
+ m_parenTails.append(parenthesesTail);
+ m_parenTailsForIteration.append(parenthesesTail);
+
+ return parenthesesTail;
+ }
+
+ void emitParenthesesTail(RegexGenerator* generator)
+ {
+ unsigned vectorSize = m_parenTails.size();
+ bool priorBacktrackFallThrough = false;
+
+ // Emit in reverse order so parentTail N can fall through to N-1
+ for (unsigned index = vectorSize; index > 0; --index) {
+ JumpList jumpsToNext;
+ priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1);
+ if (index > 1)
+ jumpsToNext.linkTo(generator->label(), generator);
+ else
+ addJumpsToNextInteration(jumpsToNext);
+ }
+ m_parenTails.clear();
+ }
+
+ void addJumpToNextInteration(Jump jump)
+ {
+ m_jumpsToNextInteration.append(jump);
+ }
+
+ void addJumpsToNextInteration(JumpList jumps)
+ {
+ m_jumpsToNextInteration.append(jumps);
+ }
+
+ void addDataLabelToNextIteration(DataLabelPtr dataLabel)
+ {
+ m_dataPtrsToNextIteration.append(dataLabel);
+ }
+
+ void linkToNextIteration(Label label)
+ {
+ m_nextIteration = label;
+
+ for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i)
+ m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration));
+
+ m_dataPtrsToNextIteration.clear();
+
+ for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i)
+ m_parenTailsForIteration[i]->setNextIteration(m_nextIteration);
+
+ m_parenTailsForIteration.clear();
+ }
+
+ void linkToNextIteration(RegexGenerator* generator)
+ {
+ m_jumpsToNextInteration.linkTo(m_nextIteration, generator);
+ }
+
+ int m_parenNestingLevel;
+ Vector<AlternativeBacktrackRecord> m_backtrackRecords;
+ IndirectJumpHashMap m_indirectJumpMap;
+ Label m_nextIteration;
+ Vector<OwnPtr<ParenthesesTail> > m_parenTails;
+ JumpList m_jumpsToNextInteration;
+ Vector<DataLabelPtr> m_dataPtrsToNextIteration;
+ Vector<ParenthesesTail*> m_parenTailsForIteration;
+ };
+
+ struct BacktrackDestination {
+ typedef enum {
+ NoBacktrack,
+ BacktrackLabel,
+ BacktrackStackOffset,
+ BacktrackJumpList,
+ BacktrackLinked
+ } BacktrackType;
+
+ BacktrackDestination()
+ : m_backtrackType(NoBacktrack)
+ , m_backtrackToLabel(0)
+ , m_subDataLabelPtr(0)
+ , m_nextBacktrack(0)
+ , m_backtrackSourceLabel(0)
+ , m_backtrackSourceJumps(0)
+ {
+ }
+
+ BacktrackDestination(int32_t stackOffset)
+ : m_backtrackType(BacktrackStackOffset)
+ , m_backtrackStackOffset(stackOffset)
+ , m_backtrackToLabel(0)
+ , m_subDataLabelPtr(0)
+ , m_nextBacktrack(0)
+ , m_backtrackSourceLabel(0)
+ , m_backtrackSourceJumps(0)
+ {
+ }
+
+ BacktrackDestination(Label label)
+ : m_backtrackType(BacktrackLabel)
+ , m_backtrackLabel(label)
+ , m_backtrackToLabel(0)
+ , m_subDataLabelPtr(0)
+ , m_nextBacktrack(0)
+ , m_backtrackSourceLabel(0)
+ , m_backtrackSourceJumps(0)
+ {
+ }
+
+ void clear(bool doDataLabelClear = true)
+ {
+ m_backtrackType = NoBacktrack;
+ if (doDataLabelClear)
+ clearDataLabel();
+ m_nextBacktrack = 0;
+ }
+
+ void clearDataLabel()
+ {
+ m_dataLabelPtr = DataLabelPtr();
+ }
+
+ bool hasDestination()
+ {
+ return (m_backtrackType != NoBacktrack);
+ }
+
+ bool isStackOffset()
+ {
+ return (m_backtrackType == BacktrackStackOffset);
+ }
+
+ bool isLabel()
+ {
+ return (m_backtrackType == BacktrackLabel);
+ }
+
+ bool isJumpList()
+ {
+ return (m_backtrackType == BacktrackJumpList);
+ }
+
+ bool hasDataLabel()
+ {
+ return m_dataLabelPtr.isSet();
+ }
+
+ void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true)
+ {
+ m_backtrackType = rhs.m_backtrackType;
+ if (m_backtrackType == BacktrackStackOffset)
+ m_backtrackStackOffset = rhs.m_backtrackStackOffset;
+ else if (m_backtrackType == BacktrackLabel)
+ m_backtrackLabel = rhs.m_backtrackLabel;
+ if (copyDataLabel)
+ m_dataLabelPtr = rhs.m_dataLabelPtr;
+ m_backtrackSourceJumps = rhs.m_backtrackSourceJumps;
+ m_backtrackSourceLabel = rhs.m_backtrackSourceLabel;
+ }
+
+ void copyTo(BacktrackDestination& lhs)
+ {
+ lhs.m_backtrackType = m_backtrackType;
+ if (m_backtrackType == BacktrackStackOffset)
+ lhs.m_backtrackStackOffset = m_backtrackStackOffset;
+ else if (m_backtrackType == BacktrackLabel)
+ lhs.m_backtrackLabel = m_backtrackLabel;
+ lhs.m_backtrackSourceJumps = m_backtrackSourceJumps;
+ lhs.m_backtrackSourceLabel = m_backtrackSourceLabel;
+ lhs.m_dataLabelPtr = m_dataLabelPtr;
+ lhs.m_backTrackJumps = m_backTrackJumps;
+ }
+
+ void addBacktrackJump(Jump jump)
+ {
+ m_backTrackJumps.append(jump);
+ }
+
+ void setStackOffset(int32_t stackOffset)
+ {
+ m_backtrackType = BacktrackStackOffset;
+ m_backtrackStackOffset = stackOffset;
+ }
+
+ void setLabel(Label label)
+ {
+ m_backtrackType = BacktrackLabel;
+ m_backtrackLabel = label;
+ }
+
+ void setNextBacktrackLabel(Label label)
+ {
+ if (m_nextBacktrack)
+ m_nextBacktrack->setLabel(label);
+ }
+
+ void copyBacktrackToLabel(BacktrackDestination& rhs)
+ {
+ if (rhs.m_backtrackToLabel)
+ m_backtrackToLabel = rhs.m_backtrackToLabel;
+ }
+
+ void setBacktrackToLabel(Label* backtrackToLabel)
+ {
+ if (!m_backtrackToLabel)
+ m_backtrackToLabel = backtrackToLabel;
+ }
+
+ void setBacktrackJumpList(JumpList* jumpList)
+ {
+ m_backtrackType = BacktrackJumpList;
+ m_backtrackSourceJumps = jumpList;
+ }
+
+ void setBacktrackSourceLabel(Label* backtrackSourceLabel)
+ {
+ m_backtrackSourceLabel = backtrackSourceLabel;
+ }
+
+ void setDataLabel(DataLabelPtr dp)
+ {
+ if (m_subDataLabelPtr) {
+ *m_subDataLabelPtr = dp;
+ m_subDataLabelPtr = 0;
+ } else
+ m_dataLabelPtr = dp;
+ }
+
+ void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr)
+ {
+ m_subDataLabelPtr = subDataLabelPtr;
+ }
+
+ void linkToNextBacktrack(BacktrackDestination* nextBacktrack)
+ {
+ m_nextBacktrack = nextBacktrack;
+ }
+
+ int32_t getStackOffset()
+ {
+ ASSERT(m_backtrackType == BacktrackStackOffset);
+ return m_backtrackStackOffset;
+ }
+
+ Label getLabel()
+ {
+ ASSERT(m_backtrackType == BacktrackLabel);
+ return m_backtrackLabel;
+ }
+
+ JumpList& getBacktrackJumps()
+ {
+ return m_backTrackJumps;
+ }
+
+ DataLabelPtr& getDataLabel()
+ {
+ return m_dataLabelPtr;
+ }
+
+ void jumpToBacktrack(MacroAssembler* masm)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ masm->jump().linkTo(*m_backtrackSourceLabel, masm);
+ else
+ m_backtrackSourceJumps->append(masm->jump());
+ } else if (isStackOffset())
+ masm->jump(Address(stackPointerRegister, m_backtrackStackOffset));
+ else if (isLabel())
+ masm->jump().linkTo(m_backtrackLabel, masm);
+ else
+ m_backTrackJumps.append(masm->jump());
+ }
+
+ void jumpToBacktrack(RegexGenerator* generator, Jump jump)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ jump.linkTo(*m_backtrackSourceLabel, generator);
+ else
+ m_backtrackSourceJumps->append(jump);
+ } else if (isStackOffset())
+ generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump);
+ else if (isLabel())
+ jump.linkTo(getLabel(), generator);
+ else
+ m_backTrackJumps.append(jump);
+ }
+
+ void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ jumps.linkTo(*m_backtrackSourceLabel, generator);
+ else
+ m_backtrackSourceJumps->append(jumps);
+ } else if (isStackOffset())
+ generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps);
+ else if (isLabel())
+ jumps.linkTo(getLabel(), generator);
+ else
+ m_backTrackJumps.append(jumps);
+ }
+
+ bool linkDataLabelToHereIfExists(RegexGenerator* generator)
+ {
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), generator->label()));
+ clearDataLabel();
+ return true;
+ }
+
+ return false;
+ }
+
+ bool plantJumpToBacktrackIfExists(RegexGenerator* generator)
+ {
+ if (isJumpList()) {
+ if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
+ generator->jump(*m_backtrackSourceLabel);
+ else
+ m_backtrackSourceJumps->append(generator->jump());
+
+ return true;
+ }
+
+ if (isStackOffset()) {
+ generator->jump(Address(stackPointerRegister, getStackOffset()));
+ return true;
+ }
+
+ if (isLabel()) {
+ generator->jump(getLabel());
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel()));
+ clearDataLabel();
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false)
+ {
+ Label hereLabel = generator->label();
+
+ if (m_backtrackToLabel) {
+ *m_backtrackToLabel = hereLabel;
+ m_backtrackToLabel = 0;
+ }
+
+ m_backTrackJumps.link(generator);
+
+ if (nextIteration)
+ generator->m_expressionState.linkToNextIteration(hereLabel);
+
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel));
+ // data label cleared as a result of the clear() below
+ }
+
+ clear();
+ }
+
+ void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false)
+ {
+ m_backTrackJumps.linkTo(label, generator);
+
+ if (nextIteration)
+ generator->m_expressionState.linkToNextIteration(label);
+
+ if (hasDataLabel()) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label));
+ clearDataLabel();
+ }
+ }
+
+ private:
+ BacktrackType m_backtrackType;
+ int32_t m_backtrackStackOffset;
+ Label m_backtrackLabel;
+ DataLabelPtr m_dataLabelPtr;
+ Label* m_backtrackToLabel;
+ DataLabelPtr* m_subDataLabelPtr;
+ BacktrackDestination* m_nextBacktrack;
+ Label* m_backtrackSourceLabel;
+ JumpList* m_backtrackSourceJumps;
+ JumpList m_backTrackJumps;
+ };
+
struct TermGenerationState {
TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
: disjunction(disjunction)
, checkedTotal(checkedTotal)
+ , m_subParenNum(0)
+ , m_linkedBacktrack(0)
+ , m_parenthesesTail(0)
{
}
void resetAlternative()
{
- isBackTrackGenerated = false;
+ m_backtrack.clear();
alt = 0;
}
bool alternativeValid()
@@ -323,11 +796,16 @@ class RegexGenerator : private MacroAssembler {
{
return disjunction->m_alternatives[alt];
}
+ bool isLastAlternative()
+ {
+ return (alt + 1) == disjunction->m_alternatives.size();
+ }
void resetTerm()
{
ASSERT(alternativeValid());
t = 0;
+ m_subParenNum = 0;
}
bool termValid()
{
@@ -349,11 +827,25 @@ class RegexGenerator : private MacroAssembler {
ASSERT(alternativeValid());
return (t + 1) == alternative()->m_terms.size();
}
+ unsigned getSubParenNum()
+ {
+ return m_subParenNum++;
+ }
bool isMainDisjunction()
{
return !disjunction->m_parent;
}
+ void setParenthesesTail(ParenthesesTail* parenthesesTail)
+ {
+ m_parenthesesTail = parenthesesTail;
+ }
+
+ ParenthesesTail* getParenthesesTail()
+ {
+ return m_parenthesesTail;
+ }
+
PatternTerm& lookaheadTerm()
{
ASSERT(alternativeValid());
@@ -374,52 +866,106 @@ class RegexGenerator : private MacroAssembler {
return term().inputPosition - checkedTotal;
}
- void jumpToBacktrack(Jump jump, MacroAssembler* masm)
+ void clearBacktrack()
{
- if (isBackTrackGenerated)
- jump.linkTo(backtrackLabel, masm);
- else
- backTrackJumps.append(jump);
+ m_backtrack.clear(false);
+ m_linkedBacktrack = 0;
}
- void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
+
+ void jumpToBacktrack(MacroAssembler* masm)
{
- if (isBackTrackGenerated)
- jumps.linkTo(backtrackLabel, masm);
- else
- backTrackJumps.append(jumps);
+ m_backtrack.jumpToBacktrack(masm);
+ }
+
+ void jumpToBacktrack(RegexGenerator* generator, Jump jump)
+ {
+ m_backtrack.jumpToBacktrack(generator, jump);
+ }
+
+ void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps)
+ {
+ m_backtrack.jumpToBacktrack(generator, jumps);
+ }
+
+ bool plantJumpToBacktrackIfExists(RegexGenerator* generator)
+ {
+ return m_backtrack.plantJumpToBacktrackIfExists(generator);
}
- bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
+
+ bool linkDataLabelToBacktrackIfExists(RegexGenerator* generator)
{
- if (isBackTrackGenerated) {
- masm->jump(backtrackLabel);
+ if ((m_backtrack.isLabel()) && (m_backtrack.hasDataLabel())) {
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_backtrack.getDataLabel(), m_backtrack.getLabel()));
+ m_backtrack.clearDataLabel();
return true;
}
+
return false;
}
+
void addBacktrackJump(Jump jump)
{
- backTrackJumps.append(jump);
+ m_backtrack.addBacktrackJump(jump);
}
- void setBacktrackGenerated(Label label)
+
+ void setBacktrackDataLabel(DataLabelPtr dp)
{
- isBackTrackGenerated = true;
- backtrackLabel = label;
+ m_backtrack.setDataLabel(dp);
}
- void linkAlternativeBacktracks(MacroAssembler* masm)
+
+ void setBackTrackStackOffset(int32_t stackOffset)
+ {
+ m_backtrack.setStackOffset(stackOffset);
+ }
+
+ void setBacktrackLabel(Label label)
+ {
+ m_backtrack.setLabel(label);
+ }
+
+ void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false)
+ {
+ m_backtrack.linkAlternativeBacktracks(generator, nextIteration);
+ m_linkedBacktrack = 0;
+ }
+
+ void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false)
+ {
+ m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration);
+ }
+
+ void setBacktrackLink(BacktrackDestination* linkedBacktrack)
+ {
+ m_linkedBacktrack = linkedBacktrack;
+ }
+
+ void chainBacktracks(BacktrackDestination* followonBacktrack)
+ {
+ if (m_linkedBacktrack)
+ m_linkedBacktrack->linkToNextBacktrack(followonBacktrack);
+ }
+
+ void chainBacktrackJumps(JumpList* jumpList)
{
- isBackTrackGenerated = false;
- backTrackJumps.link(masm);
+ if (m_linkedBacktrack && !(m_linkedBacktrack->hasDestination()))
+ m_linkedBacktrack->setBacktrackJumpList(jumpList);
}
- void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
+
+ BacktrackDestination& getBacktrackDestination()
{
- isBackTrackGenerated = false;
- backTrackJumps.linkTo(label, masm);
+ return m_backtrack;
}
- void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
+
+ void propagateBacktrackingFrom(RegexGenerator* generator, BacktrackDestination& backtrack, bool doJump = true)
{
- jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
- if (nestedParenthesesState.isBackTrackGenerated)
- setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
+ if (doJump)
+ m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps());
+ if (backtrack.hasDestination()) {
+ if (m_backtrack.hasDataLabel())
+ generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel());
+
+ m_backtrack.copyTarget(backtrack, doJump);
+ }
}
PatternDisjunction* disjunction;
@@ -427,9 +973,154 @@ class RegexGenerator : private MacroAssembler {
private:
unsigned alt;
unsigned t;
- JumpList backTrackJumps;
- Label backtrackLabel;
- bool isBackTrackGenerated;
+ unsigned m_subParenNum;
+ BacktrackDestination m_backtrack;
+ BacktrackDestination* m_linkedBacktrack;
+ ParenthesesTail* m_parenthesesTail;
+
+ };
+
+ struct ParenthesesTail {
+ ParenthesesTail(PatternTerm& term, int nestingLevel, ParenthesesTail* nextOuterParenTail)
+ : m_term(term)
+ , m_nestingLevel(nestingLevel)
+ , m_subParenIndex(0)
+ , m_nextOuterParenTail(nextOuterParenTail)
+ {
+ }
+
+ void processBacktracks(RegexGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough)
+ {
+ m_nonGreedyTryParentheses = nonGreedyTryParentheses;
+ m_fallThrough = fallThrough;
+
+ m_subParenIndex = state.getSubParenNum();
+ parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack);
+ state.chainBacktracks(&m_backtrack);
+ BacktrackDestination& stateBacktrack = state.getBacktrackDestination();
+ stateBacktrack.copyTo(m_backtrack);
+ stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel);
+ state.setBacktrackLink(&m_backtrack);
+ stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr);
+
+ m_doDirectBacktrack = m_parenBacktrack.hasDestination();
+
+ if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy))
+ m_doDirectBacktrack = false;
+
+ if (m_doDirectBacktrack)
+ state.propagateBacktrackingFrom(generator, m_parenBacktrack, false);
+ else {
+ stateBacktrack.setBacktrackJumpList(&m_pattBacktrackJumps);
+ stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens);
+ }
+
+ parenthesesState.chainBacktrackJumps(&m_pattBacktrackJumps);
+ }
+
+ void setNextIteration(Label nextIteration)
+ {
+ if (!m_nestingLevel && !m_backtrackToLabel.isSet())
+ m_backtrackToLabel = nextIteration;
+ }
+
+ void addAfterParenJump(Jump jump)
+ {
+ m_pattBacktrackJumps.append(jump);
+ }
+
+ bool generateCode(RegexGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough)
+ {
+ const RegisterID indexTemporary = regT0;
+ unsigned parenthesesFrameLocation = m_term.frameLocation;
+ Jump fromPriorBacktrack;
+ bool needJumpForPriorParenTail = false;
+
+ if (priorBackTrackFallThrough
+ && ((m_term.quantityType == QuantifierGreedy)
+ || (m_term.quantityType == QuantifierNonGreedy)
+ || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) {
+ // If the prior paren tail code assumed that it could fall through,
+ // but we need to generate after paren backtrack code, then provide
+ // a jump around that code for the prior paren tail code.
+ // A regular expressing like ((xxx)...)? needs this.
+ fromPriorBacktrack = generator->jump();
+ needJumpForPriorParenTail = true;
+ }
+
+ if (!m_backtrack.hasDestination()) {
+ if (m_backtrackToLabel.isSet()) {
+ m_backtrack.setLabel(m_backtrackToLabel);
+ nextBacktrackFallThrough = false;
+ } else if (!m_subParenIndex && m_nextOuterParenTail) {
+ // If we don't have a destination and we are the first term of a nested paren, go
+ // back to the outer paren.
+ // There is an optimization if the next outer paren is the next paren to be emitted.
+ // In that case we really want the else clause.
+ m_backtrack.setBacktrackJumpList(&m_nextOuterParenTail->m_withinBacktrackJumps);
+ nextBacktrackFallThrough = false;
+ } else
+ m_backtrack.setBacktrackJumpList(&jumpsToNext);
+ } else
+ nextBacktrackFallThrough = false;
+
+ // A failure AFTER the parens jumps here - Backtrack to this paren
+ m_backtrackFromAfterParens = generator->label();
+
+ if (m_dataAfterLabelPtr.isSet())
+ generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens));
+
+ m_pattBacktrackJumps.link(generator);
+
+ if (m_term.quantityType == QuantifierGreedy) {
+ // If this is -1 we have now tested with both with and without the parens.
+ generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, Imm32(-1)));
+ } else if (m_term.quantityType == QuantifierNonGreedy) {
+ // If this is -1 we have now tested with both with and without the parens.
+ generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ generator->branch32(Equal, indexTemporary, Imm32(-1)).linkTo(m_nonGreedyTryParentheses, generator);
+ }
+
+ if (!m_doDirectBacktrack)
+ m_parenBacktrack.plantJumpToBacktrackIfExists(generator);
+
+ // A failure WITHIN the parens jumps here
+ if (needJumpForPriorParenTail)
+ fromPriorBacktrack.link(generator);
+ m_parenBacktrack.linkAlternativeBacktracks(generator);
+ m_withinBacktrackJumps.link(generator);
+
+ if (m_term.capture())
+ generator->store32(Imm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int)));
+
+ if (m_term.quantityType == QuantifierGreedy) {
+ generator->storeToFrame(Imm32(-1), parenthesesFrameLocation);
+ generator->jump().linkTo(m_fallThrough, generator);
+ nextBacktrackFallThrough = false;
+ } else if (!nextBacktrackFallThrough)
+ m_backtrack.jumpToBacktrack(generator);
+
+ if (!m_doDirectBacktrack)
+ m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens);
+
+ return nextBacktrackFallThrough;
+ }
+
+ PatternTerm& m_term;
+ int m_nestingLevel;
+ unsigned m_subParenIndex;
+ ParenthesesTail* m_nextOuterParenTail;
+ Label m_nonGreedyTryParentheses;
+ Label m_fallThrough;
+ Label m_backtrackToLabel;
+ Label m_backtrackFromAfterParens;
+ DataLabelPtr m_dataAfterLabelPtr;
+ JumpList m_pattBacktrackJumps;
+ JumpList m_withinBacktrackJumps;
+ BacktrackDestination m_parenBacktrack;
+ BacktrackDestination m_backtrack;
+ bool m_doDirectBacktrack;
};
void generateAssertionBOL(TermGenerationState& state)
@@ -445,15 +1136,15 @@ class RegexGenerator : private MacroAssembler {
readCharacter(state.inputOffset() - 1, character);
matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
matchDest.link(this);
} else {
// Erk, really should poison out these alternatives early. :-/
if (term.inputPosition)
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
else
- state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
+ state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal)));
}
}
@@ -470,15 +1161,15 @@ class RegexGenerator : private MacroAssembler {
readCharacter(state.inputOffset(), character);
matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
matchDest.link(this);
} else {
if (term.inputPosition == state.checkedTotal)
- state.jumpToBacktrack(notAtEndOfInput(), this);
+ state.jumpToBacktrack(this, notAtEndOfInput());
// Erk, really should poison out these alternatives early. :-/
else
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
}
}
@@ -512,20 +1203,20 @@ class RegexGenerator : private MacroAssembler {
// We fall through to here if the last character was not a wordchar.
JumpList nonWordCharThenWordChar;
JumpList nonWordCharThenNonWordChar;
- if (term.invertOrCapture) {
+ if (term.invert()) {
matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
nonWordCharThenWordChar.append(jump());
} else {
matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
nonWordCharThenNonWordChar.append(jump());
}
- state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
+ state.jumpToBacktrack(this, nonWordCharThenNonWordChar);
// We jump here if the last character was a wordchar.
matchDest.link(this);
JumpList wordCharThenWordChar;
JumpList wordCharThenNonWordChar;
- if (term.invertOrCapture) {
+ if (term.invert()) {
matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
wordCharThenWordChar.append(jump());
} else {
@@ -533,8 +1224,8 @@ class RegexGenerator : private MacroAssembler {
// This can fall-though!
}
- state.jumpToBacktrack(wordCharThenWordChar, this);
-
+ state.jumpToBacktrack(this, wordCharThenWordChar);
+
nonWordCharThenWordChar.link(this);
wordCharThenNonWordChar.link(this);
}
@@ -547,10 +1238,10 @@ class RegexGenerator : private MacroAssembler {
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
readCharacter(state.inputOffset(), character);
or32(Imm32(32), character);
- state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
+ state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
+ state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset()));
}
}
@@ -562,7 +1253,7 @@ class RegexGenerator : private MacroAssembler {
int mask = 0;
int chPair = ch1 | (ch2 << 16);
-
+
if (m_pattern.m_ignoreCase) {
if (isASCIIAlpha(ch1))
mask |= 32;
@@ -573,9 +1264,9 @@ class RegexGenerator : private MacroAssembler {
if (mask) {
load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
or32(Imm32(mask), character);
- state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
+ state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask)));
} else
- state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
+ state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)));
}
void generatePatternCharacterFixed(TermGenerationState& state)
@@ -592,10 +1283,10 @@ class RegexGenerator : private MacroAssembler {
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
or32(Imm32(32), character);
- state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
+ state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
+ state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)));
}
add32(Imm32(1), countRegister);
branch32(NotEqual, countRegister, index).linkTo(loop, this);
@@ -607,7 +1298,7 @@ class RegexGenerator : private MacroAssembler {
const RegisterID countRegister = regT1;
PatternTerm& term = state.term();
UChar ch = term.patternCharacter;
-
+
move(Imm32(0), countRegister);
JumpList failures;
@@ -624,7 +1315,7 @@ class RegexGenerator : private MacroAssembler {
add32(Imm32(1), countRegister);
add32(Imm32(1), index);
- if (term.quantityCount != 0xffffffff) {
+ if (term.quantityCount != quantifyInfinite) {
branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
failures.append(jump());
} else
@@ -632,7 +1323,7 @@ class RegexGenerator : private MacroAssembler {
Label backtrackBegin(this);
loadFromFrame(term.frameLocation, countRegister);
- state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
+ state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
sub32(Imm32(1), countRegister);
sub32(Imm32(1), index);
@@ -640,7 +1331,7 @@ class RegexGenerator : private MacroAssembler {
storeToFrame(countRegister, term.frameLocation);
- state.setBacktrackGenerated(backtrackBegin);
+ state.setBacktrackLabel(backtrackBegin);
}
void generatePatternCharacterNonGreedy(TermGenerationState& state)
@@ -649,20 +1340,20 @@ class RegexGenerator : private MacroAssembler {
const RegisterID countRegister = regT1;
PatternTerm& term = state.term();
UChar ch = term.patternCharacter;
-
+
move(Imm32(0), countRegister);
Jump firstTimeDoNothing = jump();
Label hardFail(this);
sub32(countRegister, index);
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
Label backtrackBegin(this);
loadFromFrame(term.frameLocation, countRegister);
atEndOfInput().linkTo(hardFail, this);
- if (term.quantityCount != 0xffffffff)
+ if (term.quantityCount != quantifyInfinite)
branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
readCharacter(state.inputOffset(), character);
@@ -679,7 +1370,7 @@ class RegexGenerator : private MacroAssembler {
firstTimeDoNothing.link(this);
storeToFrame(countRegister, term.frameLocation);
- state.setBacktrackGenerated(backtrackBegin);
+ state.setBacktrackLabel(backtrackBegin);
}
void generateCharacterClassSingle(TermGenerationState& state)
@@ -691,10 +1382,10 @@ class RegexGenerator : private MacroAssembler {
readCharacter(state.inputOffset(), character);
matchCharacterClass(character, matchDest, term.characterClass);
- if (term.invertOrCapture)
- state.jumpToBacktrack(matchDest, this);
+ if (term.invert())
+ state.jumpToBacktrack(this, matchDest);
else {
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
matchDest.link(this);
}
}
@@ -713,10 +1404,10 @@ class RegexGenerator : private MacroAssembler {
load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
matchCharacterClass(character, matchDest, term.characterClass);
- if (term.invertOrCapture)
- state.jumpToBacktrack(matchDest, this);
+ if (term.invert())
+ state.jumpToBacktrack(this, matchDest);
else {
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
matchDest.link(this);
}
@@ -729,14 +1420,14 @@ class RegexGenerator : private MacroAssembler {
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
PatternTerm& term = state.term();
-
+
move(Imm32(0), countRegister);
JumpList failures;
Label loop(this);
failures.append(atEndOfInput());
- if (term.invertOrCapture) {
+ if (term.invert()) {
readCharacter(state.inputOffset(), character);
matchCharacterClass(character, failures, term.characterClass);
} else {
@@ -749,7 +1440,7 @@ class RegexGenerator : private MacroAssembler {
add32(Imm32(1), countRegister);
add32(Imm32(1), index);
- if (term.quantityCount != 0xffffffff) {
+ if (term.quantityCount != quantifyInfinite) {
branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
failures.append(jump());
} else
@@ -757,7 +1448,7 @@ class RegexGenerator : private MacroAssembler {
Label backtrackBegin(this);
loadFromFrame(term.frameLocation, countRegister);
- state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
+ state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
sub32(Imm32(1), countRegister);
sub32(Imm32(1), index);
@@ -765,7 +1456,7 @@ class RegexGenerator : private MacroAssembler {
storeToFrame(countRegister, term.frameLocation);
- state.setBacktrackGenerated(backtrackBegin);
+ state.setBacktrackLabel(backtrackBegin);
}
void generateCharacterClassNonGreedy(TermGenerationState& state)
@@ -773,14 +1464,14 @@ class RegexGenerator : private MacroAssembler {
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
PatternTerm& term = state.term();
-
+
move(Imm32(0), countRegister);
Jump firstTimeDoNothing = jump();
Label hardFail(this);
sub32(countRegister, index);
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
Label backtrackBegin(this);
loadFromFrame(term.frameLocation, countRegister);
@@ -792,7 +1483,7 @@ class RegexGenerator : private MacroAssembler {
readCharacter(state.inputOffset(), character);
matchCharacterClass(character, matchDest, term.characterClass);
- if (term.invertOrCapture)
+ if (term.invert())
matchDest.linkTo(hardFail, this);
else {
jump(hardFail);
@@ -805,14 +1496,14 @@ class RegexGenerator : private MacroAssembler {
firstTimeDoNothing.link(this);
storeToFrame(countRegister, term.frameLocation);
- state.setBacktrackGenerated(backtrackBegin);
+ state.setBacktrackLabel(backtrackBegin);
}
void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
{
ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
ASSERT(parenthesesTerm.quantityCount == 1);
-
+
PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
@@ -834,12 +1525,12 @@ class RegexGenerator : private MacroAssembler {
Label backtrackBegin(this);
sub32(Imm32(countToCheck), index);
state.addBacktrackJump(jump());
-
+
skip.link(this);
- state.setBacktrackGenerated(backtrackBegin);
+ state.setBacktrackLabel(backtrackBegin);
- state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
+ state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck));
state.checkedTotal += countToCheck;
}
@@ -849,6 +1540,7 @@ class RegexGenerator : private MacroAssembler {
state.checkedTotal -= countToCheck;
} else {
JumpList successes;
+ bool propogateBacktrack = false;
for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
@@ -867,37 +1559,35 @@ class RegexGenerator : private MacroAssembler {
// Matched an alternative.
DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
- successes.append(jump());
+
+ if (!state.isLastAlternative() || countToCheck)
+ successes.append(jump());
// Alternative did not match.
- Label backtrackLocation(this);
-
- // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
- state.plantJumpToBacktrackIfExists(this);
-
- state.linkAlternativeBacktracks(this);
+
+ state.setBacktrackDataLabel(dataLabel);
+
+ // Do we have a backtrack destination?
+ // if so, link the data label to it.
+ state.linkDataLabelToBacktrackIfExists(this);
+
+ if (!state.isLastAlternative() || countToCheck)
+ state.linkAlternativeBacktracks(this);
if (countToCheck) {
sub32(Imm32(countToCheck), index);
state.checkedTotal -= countToCheck;
- }
-
- m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
+ } else if (state.isLastAlternative())
+ propogateBacktrack = true;
}
// We fall through to here when the last alternative fails.
// Add a backtrack out of here for the parenthese handling code to link up.
- state.addBacktrackJump(jump());
+ if (!propogateBacktrack)
+ state.addBacktrackJump(jump());
- // Generate a trampoline for the parens code to backtrack to, to retry the
+ // Save address on stack for the parens code to backtrack to, to retry the
// next alternative.
- state.setBacktrackGenerated(label());
- loadFromFrameAndJump(alternativeFrameLocation);
-
- // FIXME: both of the above hooks are a little inefficient, in that you
- // may end up trampolining here, just to trampoline back out to the
- // parentheses code, or vice versa. We can probably eliminate a jump
- // by restructuring, but coding this way for now for simplicity during
- // development.
+ state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*));
successes.link(this);
}
@@ -918,13 +1608,19 @@ class RegexGenerator : private MacroAssembler {
alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
// optimized case - no capture & no quantifier can be handled in a light-weight manner.
- if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
+ if (!term.capture() && (term.quantityType == QuantifierFixedCount)) {
+ m_expressionState.incrementParenNestingLevel();
+
TermGenerationState parenthesesState(disjunction, state.checkedTotal);
generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
// this expects that any backtracks back out of the parentheses will be in the
- // parenthesesState's backTrackJumps vector, and that if they need backtracking
- // they will have set an entry point on the parenthesesState's backtrackLabel.
- state.propagateBacktrackingFrom(parenthesesState, this);
+ // parenthesesState's m_backTrackJumps vector, and that if they need backtracking
+ // they will have set an entry point on the parenthesesState's m_backtrackLabel.
+ BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination();
+ state.propagateBacktrackingFrom(this, parenthesesBacktrack);
+ state.getBacktrackDestination().copyBacktrackToLabel(parenthesesBacktrack);
+
+ m_expressionState.decrementParenNestingLevel();
} else {
Jump nonGreedySkipParentheses;
Label nonGreedyTryParentheses;
@@ -938,7 +1634,7 @@ class RegexGenerator : private MacroAssembler {
}
// store the match start index
- if (term.invertOrCapture) {
+ if (term.capture()) {
int inputOffset = state.inputOffset() - preCheckedCount;
if (inputOffset) {
move(index, indexTemporary);
@@ -948,45 +1644,24 @@ class RegexGenerator : private MacroAssembler {
store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
}
- // generate the body of the parentheses
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getParenthesesTail());
- Jump success = (term.quantityType == QuantifierFixedCount) ?
- jump() :
- branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))));
+ m_expressionState.incrementParenNestingLevel();
- // A failure AFTER the parens jumps here
- Label backtrackFromAfterParens(this);
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- if (term.quantityType == QuantifierGreedy) {
- // If this is -1 we have now tested with both with and without the parens.
- loadFromFrame(parenthesesFrameLocation, indexTemporary);
- state.jumpToBacktrack(branch32(Equal, indexTemporary, Imm32(-1)), this);
- } else if (term.quantityType == QuantifierNonGreedy) {
- // If this is -1 we have now tested without the parens, now test with.
- loadFromFrame(parenthesesFrameLocation, indexTemporary);
- branch32(Equal, indexTemporary, Imm32(-1)).linkTo(nonGreedyTryParentheses, this);
- }
+ // Save the parenthesesTail for backtracking from nested parens to this one.
+ parenthesesState.setParenthesesTail(parenthesesTail);
- parenthesesState.plantJumpToBacktrackIfExists(this);
- // A failure WITHIN the parens jumps here
- parenthesesState.linkAlternativeBacktracks(this);
- if (term.invertOrCapture)
- store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ // generate the body of the parentheses
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- if (term.quantityType == QuantifierGreedy)
- storeToFrame(Imm32(-1), parenthesesFrameLocation);
- else
- state.jumpToBacktrack(jump(), this);
-
- state.setBacktrackGenerated(backtrackFromAfterParens);
- if (term.quantityType == QuantifierNonGreedy)
- nonGreedySkipParentheses.link(this);
- success.link(this);
+ // For non-fixed counts, backtrack if we didn't match anything.
+ if (term.quantityType != QuantifierFixedCount)
+ parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))));
// store the match end index
- if (term.invertOrCapture) {
+ if (term.capture()) {
int inputOffset = state.inputOffset();
if (inputOffset) {
move(index, indexTemporary);
@@ -995,6 +1670,15 @@ class RegexGenerator : private MacroAssembler {
} else
store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
}
+
+ m_expressionState.decrementParenNestingLevel();
+
+ parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label());
+
+ parenthesesState.getBacktrackDestination().clear();
+
+ if (term.quantityType == QuantifierNonGreedy)
+ nonGreedySkipParentheses.link(this);
}
}
@@ -1033,6 +1717,7 @@ class RegexGenerator : private MacroAssembler {
parenthesesState.plantJumpToBacktrackIfExists(this);
parenthesesState.linkAlternativeBacktracks(this);
+
// We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
if (countToCheck) {
@@ -1057,7 +1742,7 @@ class RegexGenerator : private MacroAssembler {
int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
- if (term.invertOrCapture) {
+ if (term.invert()) {
// Inverted case
storeToFrame(index, parenthesesFrameLocation);
@@ -1069,10 +1754,11 @@ class RegexGenerator : private MacroAssembler {
generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
// Success! - which means - Fail!
loadFromFrame(parenthesesFrameLocation, index);
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
// And fail means success.
parenthesesState.linkAlternativeBacktracks(this);
+
loadFromFrame(parenthesesFrameLocation, index);
state.checkedTotal += countCheckedAfterAssertion;
@@ -1091,8 +1777,9 @@ class RegexGenerator : private MacroAssembler {
Jump success = jump();
parenthesesState.linkAlternativeBacktracks(this);
+
loadFromFrame(parenthesesFrameLocation, index);
- state.jumpToBacktrack(jump(), this);
+ state.jumpToBacktrack(this);
success.link(this);
@@ -1108,15 +1795,15 @@ class RegexGenerator : private MacroAssembler {
case PatternTerm::TypeAssertionBOL:
generateAssertionBOL(state);
break;
-
+
case PatternTerm::TypeAssertionEOL:
generateAssertionEOL(state);
break;
-
+
case PatternTerm::TypeAssertionWordBoundary:
generateAssertionWordBoundary(state);
break;
-
+
case PatternTerm::TypePatternCharacter:
switch (term.quantityType) {
case QuantifierFixedCount:
@@ -1195,7 +1882,7 @@ class RegexGenerator : private MacroAssembler {
// sufficient input available to run the first repeating alternative.
// The label 'firstAlternativeInputChecked' will jump directly to matching
// the first repeating alternative having skipped this check.
-
+
if (state.alternativeValid()) {
PatternAlternative* alternative = state.alternative();
if (!alternative->onceThrough()) {
@@ -1219,7 +1906,7 @@ class RegexGenerator : private MacroAssembler {
// Track whether any alternatives are shorter than the first one.
if (!alternative->onceThrough())
hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
-
+
for (state.resetTerm(); state.termValid(); state.nextTerm())
generateTerm(state);
@@ -1242,6 +1929,8 @@ class RegexGenerator : private MacroAssembler {
generateReturn();
state.nextAlternative();
+ if (alternative->onceThrough() && state.alternativeValid())
+ state.clearBacktrack();
// if there are any more alternatives, plant the check for input before looping.
if (state.alternativeValid()) {
@@ -1249,45 +1938,46 @@ class RegexGenerator : private MacroAssembler {
if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) {
// We have handled non-repeating alternatives, jump to next iteration
// and loop over repeating alternatives.
- state.jumpToBacktrack(jump(), this);
-
+ state.jumpToBacktrack(this);
+
countToCheckForFirstAlternative = nextAlternative->m_minimumSize;
-
+
// If we get here, there the last input checked failed.
notEnoughInputForPreviousAlternative.link(this);
-
+
state.linkAlternativeBacktracks(this);
// Back up to start the looping alternatives.
if (countCheckedForCurrentAlternative)
sub32(Imm32(countCheckedForCurrentAlternative), index);
-
+
firstAlternative = Label(this);
-
+
state.checkedTotal = countToCheckForFirstAlternative;
if (countToCheckForFirstAlternative)
notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
-
+
countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
-
+
firstAlternativeInputChecked = Label(this);
setRepeatAlternativeLabels = true;
} else {
int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
-
+
if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
// If we get here, then the last input checked failed.
notEnoughInputForPreviousAlternative.link(this);
-
+
// Check if sufficent input available to run the next alternative
notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
// We are now in the correct state to enter the next alternative; this add is only required
// to mirror and revert operation of the sub32, just below.
add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
-
+
// If we get here, then the last input checked passed.
state.linkAlternativeBacktracks(this);
+
// No need to check if we can run the next alternative, since it is shorter -
// just update index.
sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
@@ -1299,13 +1989,14 @@ class RegexGenerator : private MacroAssembler {
notEnoughInputForPreviousAlternative.link(this);
add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
notEnoughInputForPreviousAlternative.append(jump());
-
+
// The next alternative is longer than the current one; check the difference.
state.linkAlternativeBacktracks(this);
+
notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
} else { // CASE 3: Both alternatives are the same length.
ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
-
+
// If the next alterative is the same length as this one, then no need to check the input -
// if there was sufficent input to run the current alternative then there is sufficient
// input to run the next one; if not, there isn't.
@@ -1317,7 +2008,7 @@ class RegexGenerator : private MacroAssembler {
}
}
}
-
+
// If we get here, all Alternatives failed...
state.checkedTotal -= countCheckedForCurrentAlternative;
@@ -1325,7 +2016,8 @@ class RegexGenerator : private MacroAssembler {
if (!setRepeatAlternativeLabels) {
// If there are no alternatives that need repeating (all are marked 'onceThrough') then just link
// the match failures to this point, and fall through to the return below.
- state.linkAlternativeBacktracks(this);
+ state.linkAlternativeBacktracks(this, true);
+
notEnoughInputForPreviousAlternative.link(this);
} else {
// How much more input need there be to be able to retry from the first alternative?
@@ -1345,11 +2037,11 @@ class RegexGenerator : private MacroAssembler {
// First, deal with the cases where there was sufficient input to try the last alternative.
if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
- state.linkAlternativeBacktracks(this);
+ state.linkAlternativeBacktracks(this, true);
else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
- state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
+ state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true);
else { // no need to check the input, but we do have some bookkeeping to do first.
- state.linkAlternativeBacktracks(this);
+ state.linkAlternativeBacktracks(this, true);
// Where necessary update our preserved start position.
if (!m_pattern.m_body->m_hasFixedSize) {
@@ -1374,7 +2066,7 @@ class RegexGenerator : private MacroAssembler {
} else
store32(index, Address(output));
}
-
+
// Check if there is sufficent input to run the first alternative again.
jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
// No - insufficent input to run the first alteranative, are there any other alternatives we
@@ -1406,6 +2098,10 @@ class RegexGenerator : private MacroAssembler {
move(Imm32(-1), returnRegister);
generateReturn();
+
+ m_expressionState.emitParenthesesTail(this);
+ m_expressionState.emitIndirectJumpTable(this);
+ m_expressionState.linkToNextIteration(this);
}
void generateEnter()
@@ -1490,47 +2186,28 @@ public:
{
generate();
- LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()), 0);
+ LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0);
- for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
- patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
+ for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i)
+ patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation));
jitObject.set(patchBuffer.finalizeCode());
- }
-
- bool shouldFallBack()
- {
- return m_shouldFallBack;
+ jitObject.setFallBack(m_shouldFallBack);
}
private:
RegexPattern& m_pattern;
bool m_shouldFallBack;
- Vector<AlternativeBacktrackRecord> m_backtrackRecords;
+ GenerationState m_expressionState;
};
-void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase, bool multiline)
+void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject)
{
- RegexPattern pattern(ignoreCase, multiline);
- if ((error = compileRegex(patternString, pattern)))
- return;
- numSubpatterns = pattern.m_numSubpatterns;
-
- if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
- RegexGenerator generator(pattern);
- generator.compile(globalData, jitObject);
- if (!generator.shouldFallBack())
- return;
- }
-
- jitObject.setFallback(byteCompileRegex(pattern, allocator));
+ RegexGenerator generator(pattern);
+ generator.compile(globalData, jitObject);
}
+
}}
#endif
-
-
-
-
-
diff --git a/JavaScriptCore/yarr/RegexJIT.h b/JavaScriptCore/yarr/RegexJIT.h
index c4c382c..5e3dca1 100644
--- a/JavaScriptCore/yarr/RegexJIT.h
+++ b/JavaScriptCore/yarr/RegexJIT.h
@@ -29,7 +29,6 @@
#if ENABLE(YARR_JIT)
#include "MacroAssembler.h"
-#include "RegexInterpreter.h" // temporary, remove when fallback is removed.
#include "RegexPattern.h"
#include "UString.h"
@@ -51,7 +50,7 @@ class RegexCodeBlock {
public:
RegexCodeBlock()
- : m_needFallback(false)
+ : m_needFallBack(false)
{
}
@@ -59,15 +58,8 @@ public:
{
}
- BytecodePattern* getFallback() { return m_fallback.get(); }
- bool isFallback() { return m_needFallback; }
- void setFallback(PassOwnPtr<BytecodePattern> fallback)
- {
- m_fallback = fallback;
- m_needFallback = true;
- }
-
- bool operator!() { return (!m_ref.m_code.executableAddress() && !m_fallback); }
+ void setFallBack(bool fallback) { m_needFallBack = fallback; }
+ bool isFallBack() { return m_needFallBack; }
void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
int execute(const UChar* input, unsigned start, unsigned length, int* output)
@@ -81,17 +73,13 @@ public:
private:
MacroAssembler::CodeRef m_ref;
- OwnPtr<Yarr::BytecodePattern> m_fallback;
- bool m_needFallback;
+ bool m_needFallBack;
};
-void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator* allocator, bool ignoreCase = false, bool multiline = false);
+void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject);
inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
{
- if (jitObject.isFallback())
- return (interpretRegex(jitObject.getFallback(), input, start, length, output));
-
return jitObject.execute(input, start, length, output);
}
diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h
index a5c0ba2..ec5f589 100644
--- a/JavaScriptCore/yarr/RegexParser.h
+++ b/JavaScriptCore/yarr/RegexParser.h
@@ -33,6 +33,8 @@
namespace JSC { namespace Yarr {
+static const unsigned quantifyInfinite = UINT_MAX;
+
enum BuiltInCharacterClassID {
DigitClassID,
SpaceClassID,
@@ -75,7 +77,7 @@ private:
CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
: m_delegate(delegate)
, m_err(err)
- , m_state(empty)
+ , m_state(Empty)
{
}
@@ -90,54 +92,67 @@ private:
}
/*
- * atomPatternCharacterUnescaped():
+ * atomPatternCharacter():
*
- * This method is called directly from parseCharacterClass(), to report a new
- * pattern character token. This method differs from atomPatternCharacter(),
- * which will be called from parseEscape(), since a hypen provided via this
- * method may be indicating a character range, but a hyphen parsed by
- * parseEscape() cannot be interpreted as doing so.
+ * This method is called either from parseCharacterClass() (for an unescaped
+ * character in a character class), or from parseEscape(). In the former case
+ * the value true will be passed for the argument 'hyphenIsRange', and in this
+ * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/
+ * is different to /[a\-z]/).
*/
- void atomPatternCharacterUnescaped(UChar ch)
+ void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)
{
switch (m_state) {
- case empty:
+ case AfterCharacterClass:
+ // Following a builtin character class we need look out for a hyphen.
+ // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/.
+ // If we see a hyphen following a charater class then unlike usual
+ // we'll report it to the delegate immediately, and put ourself into
+ // a poisoned state. Any following calls to add another character or
+ // character class will result in an error. (A hypen following a
+ // character-class is itself valid, but only at the end of a regex).
+ if (hyphenIsRange && ch == '-') {
+ m_delegate.atomCharacterClassAtom('-');
+ m_state = AfterCharacterClassHyphen;
+ return;
+ }
+ // Otherwise just fall through - cached character so treat this as Empty.
+
+ case Empty:
m_character = ch;
- m_state = cachedCharacter;
- break;
+ m_state = CachedCharacter;
+ return;
- case cachedCharacter:
- if (ch == '-')
- m_state = cachedCharacterHyphen;
+ case CachedCharacter:
+ if (hyphenIsRange && ch == '-')
+ m_state = CachedCharacterHyphen;
else {
m_delegate.atomCharacterClassAtom(m_character);
m_character = ch;
}
- break;
+ return;
- case cachedCharacterHyphen:
- if (ch >= m_character)
- m_delegate.atomCharacterClassRange(m_character, ch);
- else
+ case CachedCharacterHyphen:
+ if (ch < m_character) {
m_err = CharacterClassOutOfOrder;
- m_state = empty;
- }
- }
-
- /*
- * atomPatternCharacter():
- *
- * Adds a pattern character, called by parseEscape(), as such will not
- * interpret a hyphen as indicating a character range.
- */
- void atomPatternCharacter(UChar ch)
- {
- // Flush if a character is already pending to prevent the
- // hyphen from begin interpreted as indicating a range.
- if((ch == '-') && (m_state == cachedCharacter))
- flush();
+ return;
+ }
+ m_delegate.atomCharacterClassRange(m_character, ch);
+ m_state = Empty;
+ return;
- atomPatternCharacterUnescaped(ch);
+ // See coment in atomBuiltInCharacterClass below.
+ // This too is technically an error, per ECMA-262, and again we
+ // we chose to allow this. Note a subtlely here that while we
+ // diverge from the spec's definition of CharacterRange we do
+ // remain in compliance with the grammar. For example, consider
+ // the expression /[\d-a-z]/. We comply with the grammar in
+ // this case by not allowing a-z to be matched as a range.
+ case AfterCharacterClassHyphen:
+ m_delegate.atomCharacterClassAtom(ch);
+ m_state = Empty;
+ return;
+ }
}
/*
@@ -147,8 +162,34 @@ private:
*/
void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
{
- flush();
- m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ switch (m_state) {
+ case CachedCharacter:
+ // Flush the currently cached character, then fall through.
+ m_delegate.atomCharacterClassAtom(m_character);
+
+ case Empty:
+ case AfterCharacterClass:
+ m_state = AfterCharacterClass;
+ m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ return;
+
+ // If we hit either of these cases, we have an invalid range that
+ // looks something like /[x-\d]/ or /[\d-\d]/.
+ // According to ECMA-262 this should be a syntax error, but
+ // empirical testing shows this to break teh webz. Instead we
+ // comply with to the ECMA-262 grammar, and assume the grammar to
+ // have matched the range correctly, but tweak our interpretation
+ // of CharacterRange. Effectively we implicitly handle the hyphen
+ // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/.
+ case CachedCharacterHyphen:
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_delegate.atomCharacterClassAtom('-');
+ // fall through
+ case AfterCharacterClassHyphen:
+ m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ m_state = Empty;
+ return;
+ }
}
/*
@@ -158,7 +199,12 @@ private:
*/
void end()
{
- flush();
+ if (m_state == CachedCharacter)
+ m_delegate.atomCharacterClassAtom(m_character);
+ else if (m_state == CachedCharacterHyphen) {
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_delegate.atomCharacterClassAtom('-');
+ }
m_delegate.atomCharacterClassEnd();
}
@@ -168,21 +214,14 @@ private:
void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
private:
- void flush()
- {
- if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
- m_delegate.atomCharacterClassAtom(m_character);
- if (m_state == cachedCharacterHyphen)
- m_delegate.atomCharacterClassAtom('-');
- m_state = empty;
- }
-
Delegate& m_delegate;
ErrorCode& m_err;
enum CharacterClassConstructionState {
- empty,
- cachedCharacter,
- cachedCharacterHyphen,
+ Empty,
+ CachedCharacter,
+ CachedCharacterHyphen,
+ AfterCharacterClass,
+ AfterCharacterClassHyphen,
} m_state;
UChar m_character;
};
@@ -428,7 +467,7 @@ private:
break;
default:
- characterClassConstructor.atomPatternCharacterUnescaped(consume());
+ characterClassConstructor.atomPatternCharacter(consume(), true);
}
if (m_err)
@@ -572,13 +611,13 @@ private:
case '*':
consume();
- parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
+ parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite);
lastTokenWasAnAtom = false;
break;
case '+':
consume();
- parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
+ parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite);
lastTokenWasAnAtom = false;
break;
@@ -597,7 +636,7 @@ private:
unsigned max = min;
if (tryConsume(','))
- max = peekIsDigit() ? consumeNumber() : UINT_MAX;
+ max = peekIsDigit() ? consumeNumber() : quantifyInfinite;
if (tryConsume('}')) {
if (min <= max)
@@ -838,7 +877,7 @@ private:
*/
template<class Delegate>
-const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
+const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite)
{
return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
}
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h
index c76c641..74ee897 100644
--- a/JavaScriptCore/yarr/RegexPattern.h
+++ b/JavaScriptCore/yarr/RegexPattern.h
@@ -39,7 +39,7 @@ namespace JSC { namespace Yarr {
#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1
-#define RegexStackSpaceForBackTrackInfoParentheses 4
+#define RegexStackSpaceForBackTrackInfoParentheses 2
struct PatternDisjunction;
@@ -103,11 +103,12 @@ struct PatternTerm {
TypeParenthesesSubpattern,
TypeParentheticalAssertion,
} type;
- bool invertOrCapture;
+ bool m_capture :1;
+ bool m_invert :1;
union {
UChar patternCharacter;
CharacterClass* characterClass;
- unsigned subpatternId;
+ unsigned backReferenceSubpatternId;
struct {
PatternDisjunction* disjunction;
unsigned subpatternId;
@@ -123,6 +124,8 @@ struct PatternTerm {
PatternTerm(UChar ch)
: type(PatternTerm::TypePatternCharacter)
+ , m_capture(false)
+ , m_invert(false)
{
patternCharacter = ch;
quantityType = QuantifierFixedCount;
@@ -131,16 +134,18 @@ struct PatternTerm {
PatternTerm(CharacterClass* charClass, bool invert)
: type(PatternTerm::TypeCharacterClass)
- , invertOrCapture(invert)
+ , m_capture(false)
+ , m_invert(invert)
{
characterClass = charClass;
quantityType = QuantifierFixedCount;
quantityCount = 1;
}
- PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
+ PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
: type(type)
- , invertOrCapture(invertOrCapture)
+ , m_capture(capture)
+ , m_invert(invert)
{
parentheses.disjunction = disjunction;
parentheses.subpatternId = subpatternId;
@@ -152,7 +157,8 @@ struct PatternTerm {
PatternTerm(Type type, bool invert = false)
: type(type)
- , invertOrCapture(invert)
+ , m_capture(false)
+ , m_invert(invert)
{
quantityType = QuantifierFixedCount;
quantityCount = 1;
@@ -160,9 +166,10 @@ struct PatternTerm {
PatternTerm(unsigned spatternId)
: type(TypeBackReference)
- , invertOrCapture(false)
+ , m_capture(false)
+ , m_invert(false)
{
- subpatternId = spatternId;
+ backReferenceSubpatternId = spatternId;
quantityType = QuantifierFixedCount;
quantityCount = 1;
}
@@ -189,12 +196,12 @@ struct PatternTerm {
bool invert()
{
- return invertOrCapture;
+ return m_invert;
}
bool capture()
{
- return invertOrCapture;
+ return m_capture;
}
void quantify(unsigned count, QuantifierType type)