diff options
Diffstat (limited to 'JavaScriptCore/yarr/RegexInterpreter.cpp')
-rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.cpp | 211 |
1 files changed, 161 insertions, 50 deletions
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp index ec96636..dc3024a 100644 --- a/JavaScriptCore/yarr/RegexInterpreter.cpp +++ b/JavaScriptCore/yarr/RegexInterpreter.cpp @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -195,6 +196,12 @@ public: return -1; } + int readPair() + { + ASSERT(pos + 1 < length); + return input[pos] | input[pos + 1] << 16; + } + int readChecked(int position) { ASSERT(position < 0); @@ -262,6 +269,11 @@ public: return (pos + position) == length; } + bool isNotAvailableInput(int position) + { + return (pos + position) > length; + } + private: const UChar* input; unsigned pos; @@ -591,20 +603,24 @@ public: unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; context->restoreOutput(output, firstSubpatternId, count); } - bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) + JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) { while (backTrack->matchAmount) { ParenthesesDisjunctionContext* context = backTrack->lastContext; - if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true)) - return true; + JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true); + if (result == JSRegExpMatch) + return JSRegExpMatch; resetMatches(term, context); popParenthesesDisjunctionContext(backTrack); freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; } - return false; + return JSRegExpNoMatch; } bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) @@ -765,7 +781,7 @@ public: return false; } - bool matchParentheses(ByteTerm& term, DisjunctionContext* context) + JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); @@ -786,31 +802,42 @@ public: while (backTrack->matchAmount < term.atom.quantityCount) { // Try to do a match, and it it succeeds, add it to the list. ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term))) + JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (result == JSRegExpMatch) appendParenthesesDisjunctionContext(backTrack, context); else { // The match failed; try to find an alternate point to carry on from. resetMatches(term, context); freeParenthesesDisjunctionContext(context); - if (!parenthesesDoBacktrack(term, backTrack)) - return false; + + if (result == JSRegExpNoMatch) { + JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); + if (backtrackResult != JSRegExpMatch) + return backtrackResult; + } else + return result; } } ASSERT(backTrack->matchAmount == term.atom.quantityCount); ParenthesesDisjunctionContext* context = backTrack->lastContext; recordParenthesesMatch(term, context); - return true; + return JSRegExpMatch; } case QuantifierGreedy: { while (backTrack->matchAmount < term.atom.quantityCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (result == JSRegExpMatch) appendParenthesesDisjunctionContext(backTrack, context); else { resetMatches(term, context); freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; + break; } } @@ -819,15 +846,15 @@ public: ParenthesesDisjunctionContext* context = backTrack->lastContext; recordParenthesesMatch(term, context); } - return true; + return JSRegExpMatch; } case QuantifierNonGreedy: - return true; + return JSRegExpMatch; } ASSERT_NOT_REACHED(); - return false; + return JSRegExpErrorNoMatch; } // Rules for backtracking differ depending on whether this is greedy or non-greedy. @@ -840,7 +867,7 @@ public: // Non-greedy, we've already done the one less case, so don't match on popping. // We haven't done the one more case, so always try to add that. // - bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context) + JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context) { ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); @@ -859,44 +886,58 @@ public: ASSERT(backTrack->matchAmount == term.atom.quantityCount); ParenthesesDisjunctionContext* context = 0; + JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); - if (!parenthesesDoBacktrack(term, backTrack)) - return false; + if (result != JSRegExpMatch) + return result; // While we haven't yet reached our fixed limit, while (backTrack->matchAmount < term.atom.quantityCount) { // Try to do a match, and it it succeeds, add it to the list. context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term))) + result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + + if (result == JSRegExpMatch) appendParenthesesDisjunctionContext(backTrack, context); else { // The match failed; try to find an alternate point to carry on from. resetMatches(term, context); freeParenthesesDisjunctionContext(context); - if (!parenthesesDoBacktrack(term, backTrack)) - return false; + + if (result == JSRegExpNoMatch) { + JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); + if (backtrackResult != JSRegExpMatch) + return backtrackResult; + } else + return result; } } ASSERT(backTrack->matchAmount == term.atom.quantityCount); context = backTrack->lastContext; recordParenthesesMatch(term, context); - return true; + return JSRegExpMatch; } case QuantifierGreedy: { if (!backTrack->matchAmount) - return false; + return JSRegExpNoMatch; ParenthesesDisjunctionContext* context = backTrack->lastContext; - if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) { + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); + if (result == JSRegExpMatch) { while (backTrack->matchAmount < term.atom.quantityCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) + JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (parenthesesResult == JSRegExpMatch) appendParenthesesDisjunctionContext(backTrack, context); else { resetMatches(term, context); freeParenthesesDisjunctionContext(context); + + if (parenthesesResult != JSRegExpNoMatch) + return parenthesesResult; + break; } } @@ -904,63 +945,108 @@ public: resetMatches(term, context); popParenthesesDisjunctionContext(backTrack); freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; } if (backTrack->matchAmount) { ParenthesesDisjunctionContext* context = backTrack->lastContext; recordParenthesesMatch(term, context); } - return true; + return JSRegExpMatch; } case QuantifierNonGreedy: { // If we've not reached the limit, try to add one more match. if (backTrack->matchAmount < term.atom.quantityCount) { ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); - if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) { + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (result == JSRegExpMatch) { appendParenthesesDisjunctionContext(backTrack, context); recordParenthesesMatch(term, context); - return true; - } else { - resetMatches(term, context); - freeParenthesesDisjunctionContext(context); + return JSRegExpMatch; } + + resetMatches(term, context); + freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; } // Nope - okay backtrack looking for an alternative. while (backTrack->matchAmount) { ParenthesesDisjunctionContext* context = backTrack->lastContext; - if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) { + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); + if (result == JSRegExpMatch) { // successful backtrack! we're back in the game! if (backTrack->matchAmount) { context = backTrack->lastContext; recordParenthesesMatch(term, context); } - return true; + return JSRegExpMatch; } // pop a match off the stack resetMatches(term, context); popParenthesesDisjunctionContext(backTrack); freeParenthesesDisjunctionContext(context); + + return result; } - return false; + return JSRegExpNoMatch; } } ASSERT_NOT_REACHED(); - return false; + return JSRegExpErrorNoMatch; + } + + void lookupForBeginChars() + { + int character; + bool firstSingleCharFound; + + while (true) { + if (input.isNotAvailableInput(2)) + return; + + firstSingleCharFound = false; + + character = input.readPair(); + + for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) { + BeginChar bc = pattern->m_beginChars[i]; + + if (!firstSingleCharFound && bc.value <= 0xFFFF) { + firstSingleCharFound = true; + character &= 0xFFFF; + } + + if ((character | bc.mask) == bc.value) + return; + } + + input.next(); + } } #define MATCH_NEXT() { ++context->term; goto matchAgain; } #define BACKTRACK() { --context->term; goto backtrack; } #define currentTerm() (disjunction->terms[context->term]) - bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) + JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false) { + if (!--remainingMatchCount) + return JSRegExpErrorHitLimit; + if (btrack) BACKTRACK(); + if (pattern->m_containsBeginChars && isBody) + lookupForBeginChars(); + context->matchBegin = input.getPos(); context->term = 0; @@ -972,14 +1058,14 @@ public: MATCH_NEXT(); case ByteTerm::TypeSubpatternEnd: context->matchEnd = input.getPos(); - return true; + return JSRegExpMatch; case ByteTerm::TypeBodyAlternativeBegin: MATCH_NEXT(); case ByteTerm::TypeBodyAlternativeDisjunction: case ByteTerm::TypeBodyAlternativeEnd: context->matchEnd = input.getPos(); - return true; + return JSRegExpMatch; case ByteTerm::TypeAlternativeBegin: MATCH_NEXT(); @@ -1069,10 +1155,16 @@ public: if (matchBackReference(currentTerm(), context)) MATCH_NEXT(); BACKTRACK(); - case ByteTerm::TypeParenthesesSubpattern: - if (matchParentheses(currentTerm(), context)) + case ByteTerm::TypeParenthesesSubpattern: { + JSRegExpResult result = matchParentheses(currentTerm(), context); + + if (result == JSRegExpMatch) { MATCH_NEXT(); + } else if (result != JSRegExpNoMatch) + return result; + BACKTRACK(); + } case ByteTerm::TypeParenthesesSubpatternOnceBegin: if (matchParenthesesOnceBegin(currentTerm(), context)) MATCH_NEXT(); @@ -1104,7 +1196,7 @@ public: switch (currentTerm().type) { case ByteTerm::TypeSubpatternBegin: - return false; + return JSRegExpNoMatch; case ByteTerm::TypeSubpatternEnd: ASSERT_NOT_REACHED(); @@ -1116,9 +1208,13 @@ public: MATCH_NEXT(); if (input.atEnd()) - return false; + return JSRegExpNoMatch; input.next(); + + if (pattern->m_containsBeginChars && isBody) + lookupForBeginChars(); + context->matchBegin = input.getPos(); if (currentTerm().alternative.onceThrough) @@ -1172,10 +1268,16 @@ public: if (backtrackBackReference(currentTerm(), context)) MATCH_NEXT(); BACKTRACK(); - case ByteTerm::TypeParenthesesSubpattern: - if (backtrackParentheses(currentTerm(), context)) + case ByteTerm::TypeParenthesesSubpattern: { + JSRegExpResult result = backtrackParentheses(currentTerm(), context); + + if (result == JSRegExpMatch) { MATCH_NEXT(); + } else if (result != JSRegExpNoMatch) + return result; + BACKTRACK(); + } case ByteTerm::TypeParenthesesSubpatternOnceBegin: if (backtrackParenthesesOnceBegin(currentTerm(), context)) MATCH_NEXT(); @@ -1199,20 +1301,23 @@ public: } ASSERT_NOT_REACHED(); - return false; + return JSRegExpErrorNoMatch; } - bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) + JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) { - if (matchDisjunction(disjunction, context, btrack)) { + JSRegExpResult result = matchDisjunction(disjunction, context, btrack); + + if (result == JSRegExpMatch) { while (context->matchBegin == context->matchEnd) { - if (!matchDisjunction(disjunction, context, true)) - return false; + result = matchDisjunction(disjunction, context, true); + if (result != JSRegExpMatch) + return result; } - return true; + return JSRegExpMatch; } - return false; + return result; } int interpret() @@ -1226,7 +1331,8 @@ public: DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); - if (matchDisjunction(pattern->m_body.get(), context)) { + JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true); + if (result == JSRegExpMatch) { output[0] = context->matchBegin; output[1] = context->matchEnd; } @@ -1235,6 +1341,9 @@ public: pattern->m_allocator->stopAllocator(); + if (output[0] == -1 && result != JSRegExpNoMatch) + return result; + return output[0]; } @@ -1243,6 +1352,7 @@ public: , output(output) , input(inputChar, start, length) , allocatorPool(0) + , remainingMatchCount(matchLimit) { } @@ -1251,6 +1361,7 @@ private: int* output; InputStream input; BumpPointerPool* allocatorPool; + unsigned remainingMatchCount; }; |