summaryrefslogtreecommitdiffstats
path: root/Source/JavaScriptCore/pcre/pcre_exec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/pcre/pcre_exec.cpp')
-rw-r--r--Source/JavaScriptCore/pcre/pcre_exec.cpp2179
1 files changed, 0 insertions, 2179 deletions
diff --git a/Source/JavaScriptCore/pcre/pcre_exec.cpp b/Source/JavaScriptCore/pcre/pcre_exec.cpp
deleted file mode 100644
index b7018aa..0000000
--- a/Source/JavaScriptCore/pcre/pcre_exec.cpp
+++ /dev/null
@@ -1,2179 +0,0 @@
-/* This is JavaScriptCore's variant of the PCRE library. While this library
-started out as a copy of PCRE, many of the features of PCRE have been
-removed. This library now supports only the regular expression features
-required by the JavaScript language specification, and has only the functions
-needed by JavaScriptCore and the rest of WebKit.
-
- Originally written by Philip Hazel
- Copyright (c) 1997-2006 University of Cambridge
- Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
- Copyright (C) 2007 Eric Seidel <eric@webkit.org>
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-/* This module contains jsRegExpExecute(), the externally visible function
-that does pattern matching using an NFA algorithm, following the rules from
-the JavaScript specification. There are also some supporting functions. */
-
-#include "config.h"
-#include "pcre_internal.h"
-
-#include <limits.h>
-#include <wtf/ASCIICType.h>
-#include <wtf/Vector.h>
-
-#if REGEXP_HISTOGRAM
-#include <wtf/DateMath.h>
-#include <runtime/UString.h>
-#endif
-
-using namespace WTF;
-
-#if HAVE(COMPUTED_GOTO)
-#define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
-//#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
-#endif
-
-/* Avoid warnings on Windows. */
-#undef min
-#undef max
-
-#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
-typedef int ReturnLocation;
-#else
-typedef void* ReturnLocation;
-#endif
-
-#if !REGEXP_HISTOGRAM
-
-class HistogramTimeLogger {
-public:
- HistogramTimeLogger(const JSRegExp*) { }
-};
-
-#else
-
-using namespace JSC;
-
-class Histogram {
-public:
- ~Histogram();
- void add(const JSRegExp*, double);
-
-private:
- typedef HashMap<RefPtr<StringImpl>, double> Map;
- Map times;
-};
-
-class HistogramTimeLogger {
-public:
- HistogramTimeLogger(const JSRegExp*);
- ~HistogramTimeLogger();
-
-private:
- const JSRegExp* m_re;
- double m_startTime;
-};
-
-#endif
-
-/* Structure for building a chain of data for holding the values of
-the subject pointer at the start of each bracket, used to detect when
-an empty string has been matched by a bracket to break infinite loops. */
-struct BracketChainNode {
- BracketChainNode* previousBracket;
- const UChar* bracketStart;
-};
-
-struct MatchFrame {
- WTF_MAKE_FAST_ALLOCATED;
-public:
- ReturnLocation returnLocation;
- struct MatchFrame* previousFrame;
-
- /* Function arguments that may change */
- struct {
- const UChar* subjectPtr;
- const unsigned char* instructionPtr;
- int offsetTop;
- BracketChainNode* bracketChain;
- } args;
-
-
- /* PCRE uses "fake" recursion built off of gotos, thus
- stack-based local variables are not safe to use. Instead we have to
- store local variables on the current MatchFrame. */
- struct {
- const unsigned char* data;
- const unsigned char* startOfRepeatingBracket;
- const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
- const unsigned char* instructionPtrAtStartOfOnce;
-
- int repeatOthercase;
-
- int ctype;
- int fc;
- int fi;
- int length;
- int max;
- int number;
- int offset;
- int saveOffset1;
- int saveOffset2;
- int saveOffset3;
-
- BracketChainNode bracketChainNode;
- } locals;
-};
-
-/* Structure for passing "static" information around between the functions
-doing traditional NFA matching, so that they are thread-safe. */
-
-struct MatchData {
- int* offsetVector; /* Offset vector */
- int offsetEnd; /* One past the end */
- int offsetMax; /* The maximum usable for return data */
- bool offsetOverflow; /* Set if too many extractions */
- const UChar* startSubject; /* Start of the subject string */
- const UChar* endSubject; /* End of the subject string */
- const UChar* endMatchPtr; /* Subject position at end match */
- int endOffsetTop; /* Highwater mark at end of match */
- bool multiline;
- bool ignoreCase;
-};
-
-/* The maximum remaining length of subject we are prepared to search for a
-reqByte match. */
-
-#define REQ_BYTE_MAX 1000
-
-/* The below limit restricts the number of "recursive" match calls in order to
-avoid spending exponential time on complex regular expressions. */
-
-static const unsigned matchLimit = 1000000;
-
-#ifdef DEBUG
-/*************************************************
-* Debugging function to print chars *
-*************************************************/
-
-/* Print a sequence of chars in printable format, stopping at the end of the
-subject if the requested.
-
-Arguments:
- p points to characters
- length number to print
- isSubject true if printing from within md.startSubject
- md pointer to matching data block, if isSubject is true
-*/
-
-static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
-{
- if (isSubject && length > md.endSubject - p)
- length = md.endSubject - p;
- while (length-- > 0) {
- int c;
- if (isASCIIPrintable(c = *(p++)))
- printf("%c", c);
- else if (c < 256)
- printf("\\x%02x", c);
- else
- printf("\\x{%x}", c);
- }
-}
-#endif
-
-/*************************************************
-* Match a back-reference *
-*************************************************/
-
-/* If a back reference hasn't been set, the length that is passed is greater
-than the number of characters left in the string, so the match fails.
-
-Arguments:
- offset index into the offset vector
- subjectPtr points into the subject
- length length to be matched
- md points to match data block
-
-Returns: true if matched
-*/
-
-static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
-{
- const UChar* p = md.startSubject + md.offsetVector[offset];
-
-#ifdef DEBUG
- if (subjectPtr >= md.endSubject)
- printf("matching subject <null>");
- else {
- printf("matching subject ");
- pchars(subjectPtr, length, true, md);
- }
- printf(" against backref ");
- pchars(p, length, false, md);
- printf("\n");
-#endif
-
- /* Always fail if not enough characters left */
-
- if (length > md.endSubject - subjectPtr)
- return false;
-
- /* Separate the caselesss case for speed */
-
- if (md.ignoreCase) {
- while (length-- > 0) {
- UChar c = *p++;
- int othercase = jsc_pcre_ucp_othercase(c);
- UChar d = *subjectPtr++;
- if (c != d && othercase != d)
- return false;
- }
- }
- else {
- while (length-- > 0)
- if (*p++ != *subjectPtr++)
- return false;
- }
-
- return true;
-}
-
-#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
-
-/* Use numbered labels and switch statement at the bottom of the match function. */
-
-#define RMATCH_WHERE(num) num
-#define RRETURN_LABEL RRETURN_SWITCH
-
-#else
-
-/* Use GCC's computed goto extension. */
-
-/* For one test case this is more than 40% faster than the switch statement.
-We could avoid the use of the num argument entirely by using local labels,
-but using it for the GCC case as well as the non-GCC case allows us to share
-a bit more code and notice if we use conflicting numbers.*/
-
-#define RMATCH_WHERE(num) &&RRETURN_##num
-#define RRETURN_LABEL *stack.currentFrame->returnLocation
-
-#endif
-
-#define RECURSIVE_MATCH_COMMON(num) \
- goto RECURSE;\
- RRETURN_##num: \
- stack.popCurrentFrame();
-
-#define RECURSIVE_MATCH(num, ra, rb) \
- do { \
- stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
- RECURSIVE_MATCH_COMMON(num) \
- } while (0)
-
-#define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb) \
- do { \
- stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
- startNewGroup(stack.currentFrame); \
- RECURSIVE_MATCH_COMMON(num) \
- } while (0)
-
-#define RRETURN goto RRETURN_LABEL
-
-#define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
-
-/*************************************************
-* Match from current position *
-*************************************************/
-
-/* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
-in the subject string, while substringStart holds the value of subjectPtr at the start of the
-last bracketed group - used for breaking infinite loops matching zero-length
-strings. This function is called recursively in many circumstances. Whenever it
-returns a negative (error) response, the outer match() call must also return the
-same response.
-
-Arguments:
- subjectPtr pointer in subject
- instructionPtr position in code
- offsetTop current top pointer
- md pointer to "static" info for the match
-
-Returns: 1 if matched ) these values are >= 0
- 0 if failed to match )
- a negative error value if aborted by an error condition
- (e.g. stopped by repeated call or recursion limit)
-*/
-
-static const unsigned numFramesOnStack = 16;
-
-struct MatchStack {
- MatchStack()
- : framesEnd(frames + numFramesOnStack)
- , currentFrame(frames)
- , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
- {
- ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
- }
-
- MatchFrame frames[numFramesOnStack];
- MatchFrame* framesEnd;
- MatchFrame* currentFrame;
- unsigned size;
-
- inline bool canUseStackBufferForNextFrame()
- {
- return size < numFramesOnStack;
- }
-
- inline MatchFrame* allocateNextFrame()
- {
- if (canUseStackBufferForNextFrame())
- return currentFrame + 1;
- return new MatchFrame;
- }
-
- inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation)
- {
- MatchFrame* newframe = allocateNextFrame();
- newframe->previousFrame = currentFrame;
-
- newframe->args.subjectPtr = currentFrame->args.subjectPtr;
- newframe->args.offsetTop = currentFrame->args.offsetTop;
- newframe->args.instructionPtr = instructionPtr;
- newframe->args.bracketChain = bracketChain;
- newframe->returnLocation = returnLocation;
- size++;
-
- currentFrame = newframe;
- }
-
- inline void popCurrentFrame()
- {
- MatchFrame* oldFrame = currentFrame;
- currentFrame = currentFrame->previousFrame;
- if (size > numFramesOnStack)
- delete oldFrame;
- size--;
- }
-
- void popAllFrames()
- {
- while (size)
- popCurrentFrame();
- }
-};
-
-static int matchError(int errorCode, MatchStack& stack)
-{
- stack.popAllFrames();
- return errorCode;
-}
-
-/* Get the next UTF-8 character, not advancing the pointer, incrementing length
- if there are extra bytes. This is called when we know we are in UTF-8 mode. */
-
-static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
-{
- c = *subjectPtr;
- if ((c & 0xc0) == 0xc0) {
- int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
- int gcss = 6 * gcaa;
- c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
- for (int gcii = 1; gcii <= gcaa; gcii++) {
- gcss -= 6;
- c |= (subjectPtr[gcii] & 0x3f) << gcss;
- }
- len += gcaa;
- }
-}
-
-static inline void startNewGroup(MatchFrame* currentFrame)
-{
- /* At the start of a bracketed group, add the current subject pointer to the
- stack of such pointers, to be re-instated at the end of the group when we hit
- the closing ket. When match() is called in other circumstances, we don't add to
- this stack. */
-
- currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain;
- currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr;
- currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;
-}
-
-// FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
-static inline void repeatInformationFromInstructionOffset(int instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
-{
- // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
- static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
- static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
-
- ASSERT(instructionOffset >= 0);
- ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
-
- minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
- minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
- maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
-}
-
-static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
-{
- bool isMatch = false;
- int min;
- bool minimize = false; /* Initialization not really needed, but some compilers think so. */
- unsigned remainingMatchCount = matchLimit;
- int othercase; /* Declare here to avoid errors during jumps */
-
- MatchStack stack;
-
- /* The opcode jump table. */
-#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
-#define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
- static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
-#undef EMIT_JUMP_TABLE_ENTRY
-#endif
-
- /* One-time setup of the opcode jump table. */
-#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
- for (int i = 255; !opcodeJumpTable[i]; i--)
- opcodeJumpTable[i] = &&CAPTURING_BRACKET;
-#endif
-
-#ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
- // Shark shows this as a hot line
- // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
- stack.currentFrame->returnLocation = &&RETURN;
-#else
- stack.currentFrame->returnLocation = 0;
-#endif
- stack.currentFrame->args.subjectPtr = subjectPtr;
- stack.currentFrame->args.instructionPtr = instructionPtr;
- stack.currentFrame->args.offsetTop = offsetTop;
- stack.currentFrame->args.bracketChain = 0;
- startNewGroup(stack.currentFrame);
-
- /* This is where control jumps back to to effect "recursion" */
-
-RECURSE:
- if (!--remainingMatchCount)
- return matchError(JSRegExpErrorHitLimit, stack);
-
- /* Now start processing the operations. */
-
-#ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
- while (true)
-#endif
- {
-
-#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
-#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
-#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
-#else
-#define BEGIN_OPCODE(opcode) case OP_##opcode
-#define NEXT_OPCODE continue
-#endif
-
-#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
- NEXT_OPCODE;
-#else
- switch (*stack.currentFrame->args.instructionPtr)
-#endif
- {
- /* Non-capturing bracket: optimized */
-
- BEGIN_OPCODE(BRA):
- NON_CAPTURING_BRACKET:
- DPRINTF(("start bracket 0\n"));
- do {
- RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
- } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
- DPRINTF(("bracket 0 failed\n"));
- RRETURN;
-
- /* Skip over large extraction number data if encountered. */
-
- BEGIN_OPCODE(BRANUMBER):
- stack.currentFrame->args.instructionPtr += 3;
- NEXT_OPCODE;
-
- /* End of the pattern. */
-
- BEGIN_OPCODE(END):
- md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */
- md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */
- isMatch = true;
- RRETURN;
-
- /* Assertion brackets. Check the alternative branches in turn - the
- matching won't pass the KET for an assertion. If any one branch matches,
- the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
- start of each branch to move the current point backwards, so the code at
- this level is identical to the lookahead case. */
-
- BEGIN_OPCODE(ASSERT):
- do {
- RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
- if (isMatch)
- break;
- stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
- } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
- if (*stack.currentFrame->args.instructionPtr == OP_KET)
- RRETURN_NO_MATCH;
-
- /* Continue from after the assertion, updating the offsets high water
- mark, since extracts may have been taken during the assertion. */
-
- advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
- stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
- stack.currentFrame->args.offsetTop = md.endOffsetTop;
- NEXT_OPCODE;
-
- /* Negative assertion: all branches must fail to match */
-
- BEGIN_OPCODE(ASSERT_NOT):
- do {
- RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
- if (isMatch)
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
- } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
-
- stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
- NEXT_OPCODE;
-
- /* An alternation is the end of a branch; scan along to find the end of the
- bracketed group and go to there. */
-
- BEGIN_OPCODE(ALT):
- advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
- NEXT_OPCODE;
-
- /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
- that it may occur zero times. It may repeat infinitely, or not at all -
- i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
- repeat limits are compiled as a number of copies, with the optional ones
- preceded by BRAZERO or BRAMINZERO. */
-
- BEGIN_OPCODE(BRAZERO): {
- stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
- RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
- stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
- NEXT_OPCODE;
- }
-
- BEGIN_OPCODE(BRAMINZERO): {
- stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
- advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
- RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
- }
-
- /* End of a group, repeated or non-repeating. If we are at the end of
- an assertion "group", stop matching and return 1, but record the
- current high water mark for use by positive assertions. Do this also
- for the "once" (not-backup up) groups. */
-
- BEGIN_OPCODE(KET):
- BEGIN_OPCODE(KETRMIN):
- BEGIN_OPCODE(KETRMAX):
- stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
-
- /* Back up the stack of bracket start pointers. */
-
- stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
-
- if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
- md.endOffsetTop = stack.currentFrame->args.offsetTop;
- isMatch = true;
- RRETURN;
- }
-
- /* In all other cases except a conditional group we have to check the
- group number back at the start and if necessary complete handling an
- extraction by setting the offsets and bumping the high water mark. */
-
- stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
-
- /* For extended extraction brackets (large number), we have to fish out
- the number from a dummy opcode at the start. */
-
- if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
- stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
- stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
-
-#ifdef DEBUG
- printf("end bracket %d", stack.currentFrame->locals.number);
- printf("\n");
-#endif
-
- /* Test for a numbered group. This includes groups called as a result
- of recursion. Note that whole-pattern recursion is coded as a recurse
- into group 0, so it won't be picked up here. Instead, we catch it when
- the OP_END is reached. */
-
- if (stack.currentFrame->locals.number > 0) {
- if (stack.currentFrame->locals.offset >= md.offsetMax)
- md.offsetOverflow = true;
- else {
- md.offsetVector[stack.currentFrame->locals.offset] =
- md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
- md.offsetVector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.startSubject;
- if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
- stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
- }
- }
-
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. If there is an options reset, it will get obeyed in the normal
- course of events. */
-
- if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
- stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
- NEXT_OPCODE;
- }
-
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. */
-
- if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
- RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- RECURSIVE_MATCH_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- } else { /* OP_KETRMAX */
- RECURSIVE_MATCH_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- }
- RRETURN;
-
- /* Start of subject. */
-
- BEGIN_OPCODE(CIRC):
- if (stack.currentFrame->args.subjectPtr != md.startSubject)
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- /* After internal newline if multiline. */
-
- BEGIN_OPCODE(BOL):
- if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- /* End of subject. */
-
- BEGIN_OPCODE(DOLL):
- if (stack.currentFrame->args.subjectPtr < md.endSubject)
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- /* Before internal newline if multiline. */
-
- BEGIN_OPCODE(EOL):
- if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- /* Word boundary assertions */
-
- BEGIN_OPCODE(NOT_WORD_BOUNDARY):
- BEGIN_OPCODE(WORD_BOUNDARY): {
- bool currentCharIsWordChar = false;
- bool previousCharIsWordChar = false;
-
- if (stack.currentFrame->args.subjectPtr > md.startSubject)
- previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
- if (stack.currentFrame->args.subjectPtr < md.endSubject)
- currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
-
- /* Now see if the situation is what we want */
- bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
- if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
- RRETURN_NO_MATCH;
- NEXT_OPCODE;
- }
-
- /* Match a single character type; inline for speed */
-
- BEGIN_OPCODE(NOT_NEWLINE):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (isNewline(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- BEGIN_OPCODE(NOT_DIGIT):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- BEGIN_OPCODE(DIGIT):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- BEGIN_OPCODE(NOT_WHITESPACE):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- BEGIN_OPCODE(WHITESPACE):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- BEGIN_OPCODE(NOT_WORDCHAR):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (isWordChar(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- BEGIN_OPCODE(WORDCHAR):
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr++;
- NEXT_OPCODE;
-
- /* Match a back reference, possibly repeatedly. Look past the end of the
- item to see if there is repeat information following. The code is similar
- to that for character classes, but repeated for efficiency. Then obey
- similar code to character type repeats - written out again for speed.
- However, if the referenced string is the empty string, always treat
- it as matched, any number of times (otherwise there could be infinite
- loops). */
-
- BEGIN_OPCODE(REF):
- stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1; /* Doubled ref number */
- stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
-
- /* If the reference is unset, set the length to be longer than the amount
- of subject left; this ensures that every attempt at a match fails. We
- can't just fail here, because of the possibility of quantifiers with zero
- minima. */
-
- if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
- stack.currentFrame->locals.length = 0;
- else
- stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
-
- /* Set up for repetition, or handle the non-repeated case */
-
- switch (*stack.currentFrame->args.instructionPtr) {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
- break;
-
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
- min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
- if (stack.currentFrame->locals.max == 0)
- stack.currentFrame->locals.max = INT_MAX;
- stack.currentFrame->args.instructionPtr += 5;
- break;
-
- default: /* No repeat follows */
- if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
- NEXT_OPCODE;
- }
-
- /* If the length of the reference is zero, just continue with the
- main loop. */
-
- if (stack.currentFrame->locals.length == 0)
- NEXT_OPCODE;
-
- /* First, ensure the minimum number of matches are present. */
-
- for (int i = 1; i <= min; i++) {
- if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
- RRETURN_NO_MATCH;
- stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
- }
-
- /* If min = max, continue at the same level without recursion.
- They are not both allowed to be zero. */
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- /* If minimizing, keep trying and advancing the pointer */
-
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
- RRETURN;
- stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
- }
- /* Control never reaches here */
- }
-
- /* If maximizing, find the longest string and work backwards */
-
- else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
- break;
- stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
- }
- while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
- RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
- }
- RRETURN_NO_MATCH;
- }
- /* Control never reaches here */
-
- /* Match a bit-mapped character class, possibly repeatedly. This op code is
- used when all the characters in the class have values in the range 0-255,
- and either the matching is caseful, or the characters are in the range
- 0-127 when UTF-8 processing is enabled. The only difference between
- OP_CLASS and OP_NCLASS occurs when a data character outside the range is
- encountered.
-
- First, look past the end of the item to see if there is repeat information
- following. Then obey similar code to character type repeats - written out
- again for speed. */
-
- BEGIN_OPCODE(NCLASS):
- BEGIN_OPCODE(CLASS):
- stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */
- stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
-
- switch (*stack.currentFrame->args.instructionPtr) {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
- break;
-
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
- min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
- if (stack.currentFrame->locals.max == 0)
- stack.currentFrame->locals.max = INT_MAX;
- stack.currentFrame->args.instructionPtr += 5;
- break;
-
- default: /* No repeat follows */
- min = stack.currentFrame->locals.max = 1;
- break;
- }
-
- /* First, ensure the minimum number of matches are present. */
-
- for (int i = 1; i <= min; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- int c = *stack.currentFrame->args.subjectPtr++;
- if (c > 255) {
- if (stack.currentFrame->locals.data[-1] == OP_CLASS)
- RRETURN_NO_MATCH;
- } else {
- if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
- RRETURN_NO_MATCH;
- }
- }
-
- /* If max == min we can continue with the main loop without the
- need to recurse. */
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- /* If minimizing, keep testing the rest of the expression and advancing
- the pointer while it matches the class. */
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN;
- int c = *stack.currentFrame->args.subjectPtr++;
- if (c > 255) {
- if (stack.currentFrame->locals.data[-1] == OP_CLASS)
- RRETURN;
- } else {
- if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
- RRETURN;
- }
- }
- /* Control never reaches here */
- }
- /* If maximizing, find the longest possible run, then work backwards. */
- else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
-
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (c > 255) {
- if (stack.currentFrame->locals.data[-1] == OP_CLASS)
- break;
- } else {
- if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
- break;
- }
- ++stack.currentFrame->args.subjectPtr;
- }
- for (;;) {
- RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
- break; /* Stop if tried at original pos */
- }
-
- RRETURN;
- }
- /* Control never reaches here */
-
- /* Match an extended character class. */
-
- BEGIN_OPCODE(XCLASS):
- stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */
- stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); /* Advance past the item */
-
- switch (*stack.currentFrame->args.instructionPtr) {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
- break;
-
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
- min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
- if (stack.currentFrame->locals.max == 0)
- stack.currentFrame->locals.max = INT_MAX;
- stack.currentFrame->args.instructionPtr += 5;
- break;
-
- default: /* No repeat follows */
- min = stack.currentFrame->locals.max = 1;
- }
-
- /* First, ensure the minimum number of matches are present. */
-
- for (int i = 1; i <= min; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- int c = *stack.currentFrame->args.subjectPtr++;
- if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
- RRETURN_NO_MATCH;
- }
-
- /* If max == min we can continue with the main loop without the
- need to recurse. */
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- /* If minimizing, keep testing the rest of the expression and advancing
- the pointer while it matches the class. */
-
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN;
- int c = *stack.currentFrame->args.subjectPtr++;
- if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
- RRETURN;
- }
- /* Control never reaches here */
- }
-
- /* If maximizing, find the longest possible run, then work backwards. */
-
- else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- for(;;) {
- RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
- break; /* Stop if tried at original pos */
- }
- RRETURN;
- }
-
- /* Control never reaches here */
-
- /* Match a single character, casefully */
-
- BEGIN_OPCODE(CHAR):
- stack.currentFrame->locals.length = 1;
- stack.currentFrame->args.instructionPtr++;
- getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
- stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
- RRETURN_NO_MATCH;
- NEXT_OPCODE;
-
- /* Match a single character, caselessly */
-
- BEGIN_OPCODE(CHAR_IGNORING_CASE): {
- stack.currentFrame->locals.length = 1;
- stack.currentFrame->args.instructionPtr++;
- getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
- stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- int dc = *stack.currentFrame->args.subjectPtr++;
- if (stack.currentFrame->locals.fc != dc && jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
- RRETURN_NO_MATCH;
- NEXT_OPCODE;
- }
-
- /* Match a single ASCII character. */
-
- BEGIN_OPCODE(ASCII_CHAR):
- if (md.endSubject == stack.currentFrame->args.subjectPtr)
- RRETURN_NO_MATCH;
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- stack.currentFrame->args.instructionPtr += 2;
- NEXT_OPCODE;
-
- /* Match one of two cases of an ASCII letter. */
-
- BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
- if (md.endSubject == stack.currentFrame->args.subjectPtr)
- RRETURN_NO_MATCH;
- if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- stack.currentFrame->args.instructionPtr += 2;
- NEXT_OPCODE;
-
- /* Match a single character repeatedly; different opcodes share code. */
-
- BEGIN_OPCODE(EXACT):
- min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- minimize = false;
- stack.currentFrame->args.instructionPtr += 3;
- goto REPEATCHAR;
-
- BEGIN_OPCODE(UPTO):
- BEGIN_OPCODE(MINUPTO):
- min = 0;
- stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
- stack.currentFrame->args.instructionPtr += 3;
- goto REPEATCHAR;
-
- BEGIN_OPCODE(STAR):
- BEGIN_OPCODE(MINSTAR):
- BEGIN_OPCODE(PLUS):
- BEGIN_OPCODE(MINPLUS):
- BEGIN_OPCODE(QUERY):
- BEGIN_OPCODE(MINQUERY):
- repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
-
- /* Common code for all repeated single-character matches. We can give
- up quickly if there are fewer than the minimum number of characters left in
- the subject. */
-
- REPEATCHAR:
-
- stack.currentFrame->locals.length = 1;
- getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
- if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
- RRETURN_NO_MATCH;
- stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
-
- if (stack.currentFrame->locals.fc <= 0xFFFF) {
- othercase = md.ignoreCase ? jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
-
- for (int i = 1; i <= min; i++) {
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- if (minimize) {
- stack.currentFrame->locals.repeatOthercase = othercase;
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN;
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
- RRETURN;
- ++stack.currentFrame->args.subjectPtr;
- }
- /* Control never reaches here */
- } else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
- RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- --stack.currentFrame->args.subjectPtr;
- }
- RRETURN_NO_MATCH;
- }
- /* Control never reaches here */
- } else {
- /* No case on surrogate pairs, so no need to bother with "othercase". */
-
- for (int i = 1; i <= min; i++) {
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
- RRETURN_NO_MATCH;
- stack.currentFrame->args.subjectPtr += 2;
- }
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN;
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
- RRETURN;
- stack.currentFrame->args.subjectPtr += 2;
- }
- /* Control never reaches here */
- } else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
- break;
- if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
- break;
- stack.currentFrame->args.subjectPtr += 2;
- }
- while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
- RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- stack.currentFrame->args.subjectPtr -= 2;
- }
- RRETURN_NO_MATCH;
- }
- /* Control never reaches here */
- }
- /* Control never reaches here */
-
- /* Match a negated single one-byte character. */
-
- BEGIN_OPCODE(NOT): {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN_NO_MATCH;
- int b = stack.currentFrame->args.instructionPtr[1];
- int c = *stack.currentFrame->args.subjectPtr++;
- stack.currentFrame->args.instructionPtr += 2;
- if (md.ignoreCase) {
- if (c < 128)
- c = toLowerCase(c);
- if (toLowerCase(b) == c)
- RRETURN_NO_MATCH;
- } else {
- if (b == c)
- RRETURN_NO_MATCH;
- }
- NEXT_OPCODE;
- }
-
- /* Match a negated single one-byte character repeatedly. This is almost a
- repeat of the code for a repeated single character, but I haven't found a
- nice way of commoning these up that doesn't require a test of the
- positive/negative option for each character match. Maybe that wouldn't add
- very much to the time taken, but character matching *is* what this is all
- about... */
-
- BEGIN_OPCODE(NOTEXACT):
- min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- minimize = false;
- stack.currentFrame->args.instructionPtr += 3;
- goto REPEATNOTCHAR;
-
- BEGIN_OPCODE(NOTUPTO):
- BEGIN_OPCODE(NOTMINUPTO):
- min = 0;
- stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
- stack.currentFrame->args.instructionPtr += 3;
- goto REPEATNOTCHAR;
-
- BEGIN_OPCODE(NOTSTAR):
- BEGIN_OPCODE(NOTMINSTAR):
- BEGIN_OPCODE(NOTPLUS):
- BEGIN_OPCODE(NOTMINPLUS):
- BEGIN_OPCODE(NOTQUERY):
- BEGIN_OPCODE(NOTMINQUERY):
- repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
-
- /* Common code for all repeated single-byte matches. We can give up quickly
- if there are fewer than the minimum number of bytes left in the
- subject. */
-
- REPEATNOTCHAR:
- if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
- RRETURN_NO_MATCH;
- stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
-
- /* The code is duplicated for the caseless and caseful cases, for speed,
- since matching characters is likely to be quite common. First, ensure the
- minimum number of matches are present. If min = max, continue at the same
- level without recursing. Otherwise, if minimizing, keep trying the rest of
- the expression and advancing one matching character if failing, up to the
- maximum. Alternatively, if maximizing, find the maximum number of
- characters and work backwards. */
-
- DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
-
- if (md.ignoreCase) {
- if (stack.currentFrame->locals.fc < 128)
- stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
-
- for (int i = 1; i <= min; i++) {
- int d = *stack.currentFrame->args.subjectPtr++;
- if (d < 128)
- d = toLowerCase(d);
- if (stack.currentFrame->locals.fc == d)
- RRETURN_NO_MATCH;
- }
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- int d = *stack.currentFrame->args.subjectPtr++;
- if (d < 128)
- d = toLowerCase(d);
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
- RRETURN;
- }
- /* Control never reaches here */
- }
-
- /* Maximize case */
-
- else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
-
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int d = *stack.currentFrame->args.subjectPtr;
- if (d < 128)
- d = toLowerCase(d);
- if (stack.currentFrame->locals.fc == d)
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- for (;;) {
- RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
- break; /* Stop if tried at original pos */
- }
-
- RRETURN;
- }
- /* Control never reaches here */
- }
-
- /* Caseful comparisons */
-
- else {
- for (int i = 1; i <= min; i++) {
- int d = *stack.currentFrame->args.subjectPtr++;
- if (stack.currentFrame->locals.fc == d)
- RRETURN_NO_MATCH;
- }
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- int d = *stack.currentFrame->args.subjectPtr++;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
- RRETURN;
- }
- /* Control never reaches here */
- }
-
- /* Maximize case */
-
- else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
-
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int d = *stack.currentFrame->args.subjectPtr;
- if (stack.currentFrame->locals.fc == d)
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- for (;;) {
- RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
- break; /* Stop if tried at original pos */
- }
-
- RRETURN;
- }
- }
- /* Control never reaches here */
-
- /* Match a single character type repeatedly; several different opcodes
- share code. This is very similar to the code for single characters, but we
- repeat it in the interests of efficiency. */
-
- BEGIN_OPCODE(TYPEEXACT):
- min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- minimize = true;
- stack.currentFrame->args.instructionPtr += 3;
- goto REPEATTYPE;
-
- BEGIN_OPCODE(TYPEUPTO):
- BEGIN_OPCODE(TYPEMINUPTO):
- min = 0;
- stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
- minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
- stack.currentFrame->args.instructionPtr += 3;
- goto REPEATTYPE;
-
- BEGIN_OPCODE(TYPESTAR):
- BEGIN_OPCODE(TYPEMINSTAR):
- BEGIN_OPCODE(TYPEPLUS):
- BEGIN_OPCODE(TYPEMINPLUS):
- BEGIN_OPCODE(TYPEQUERY):
- BEGIN_OPCODE(TYPEMINQUERY):
- repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
-
- /* Common code for all repeated single character type matches. Note that
- in UTF-8 mode, '.' matches a character of any length, but for the other
- character types, the valid characters are all one-byte long. */
-
- REPEATTYPE:
- stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */
-
- /* First, ensure the minimum number of matches are present. Use inline
- code for maximizing the speed, and do the type test once at the start
- (i.e. keep it out of the loop). Also we can test that there are at least
- the minimum number of characters before we start. */
-
- if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
- RRETURN_NO_MATCH;
- if (min > 0) {
- switch (stack.currentFrame->locals.ctype) {
- case OP_NOT_NEWLINE:
- for (int i = 1; i <= min; i++) {
- if (isNewline(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_NOT_DIGIT:
- for (int i = 1; i <= min; i++) {
- if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_DIGIT:
- for (int i = 1; i <= min; i++) {
- if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_NOT_WHITESPACE:
- for (int i = 1; i <= min; i++) {
- if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_WHITESPACE:
- for (int i = 1; i <= min; i++) {
- if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_NOT_WORDCHAR:
- for (int i = 1; i <= min; i++) {
- if (isWordChar(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_WORDCHAR:
- for (int i = 1; i <= min; i++) {
- if (!isWordChar(*stack.currentFrame->args.subjectPtr))
- RRETURN_NO_MATCH;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- default:
- ASSERT_NOT_REACHED();
- return matchError(JSRegExpErrorInternal, stack);
- } /* End switch(stack.currentFrame->locals.ctype) */
- }
-
- /* If min = max, continue at the same level without recursing */
-
- if (min == stack.currentFrame->locals.max)
- NEXT_OPCODE;
-
- /* If minimizing, we have to test the rest of the pattern before each
- subsequent match. */
-
- if (minimize) {
- for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
- RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
- RRETURN;
-
- int c = *stack.currentFrame->args.subjectPtr++;
- switch (stack.currentFrame->locals.ctype) {
- case OP_NOT_NEWLINE:
- if (isNewline(c))
- RRETURN;
- break;
-
- case OP_NOT_DIGIT:
- if (isASCIIDigit(c))
- RRETURN;
- break;
-
- case OP_DIGIT:
- if (!isASCIIDigit(c))
- RRETURN;
- break;
-
- case OP_NOT_WHITESPACE:
- if (isSpaceChar(c))
- RRETURN;
- break;
-
- case OP_WHITESPACE:
- if (!isSpaceChar(c))
- RRETURN;
- break;
-
- case OP_NOT_WORDCHAR:
- if (isWordChar(c))
- RRETURN;
- break;
-
- case OP_WORDCHAR:
- if (!isWordChar(c))
- RRETURN;
- break;
-
- default:
- ASSERT_NOT_REACHED();
- return matchError(JSRegExpErrorInternal, stack);
- }
- }
- /* Control never reaches here */
- }
-
- /* If maximizing it is worth using inline code for speed, doing the type
- test once at the start (i.e. keep it out of the loop). */
-
- else {
- stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
-
- switch (stack.currentFrame->locals.ctype) {
- case OP_NOT_NEWLINE:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
- break;
- stack.currentFrame->args.subjectPtr++;
- }
- break;
-
- case OP_NOT_DIGIT:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (isASCIIDigit(c))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_DIGIT:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (!isASCIIDigit(c))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_NOT_WHITESPACE:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (isSpaceChar(c))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_WHITESPACE:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (!isSpaceChar(c))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_NOT_WORDCHAR:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (isWordChar(c))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- case OP_WORDCHAR:
- for (int i = min; i < stack.currentFrame->locals.max; i++) {
- if (stack.currentFrame->args.subjectPtr >= md.endSubject)
- break;
- int c = *stack.currentFrame->args.subjectPtr;
- if (!isWordChar(c))
- break;
- ++stack.currentFrame->args.subjectPtr;
- }
- break;
-
- default:
- ASSERT_NOT_REACHED();
- return matchError(JSRegExpErrorInternal, stack);
- }
-
- /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
-
- for (;;) {
- RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
- break; /* Stop if tried at original pos */
- }
-
- /* Get here if we can't make it match with any permitted repetitions */
-
- RRETURN;
- }
- /* Control never reaches here */
-
- BEGIN_OPCODE(CRMINPLUS):
- BEGIN_OPCODE(CRMINQUERY):
- BEGIN_OPCODE(CRMINRANGE):
- BEGIN_OPCODE(CRMINSTAR):
- BEGIN_OPCODE(CRPLUS):
- BEGIN_OPCODE(CRQUERY):
- BEGIN_OPCODE(CRRANGE):
- BEGIN_OPCODE(CRSTAR):
- ASSERT_NOT_REACHED();
- return matchError(JSRegExpErrorInternal, stack);
-
-#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
- CAPTURING_BRACKET:
-#else
- default:
-#endif
- /* Opening capturing bracket. If there is space in the offset vector, save
- the current subject position in the working slot at the top of the vector. We
- mustn't change the current values of the data slot, because they may be set
- from a previous iteration of this group, and be referred to by a reference
- inside the group.
-
- If the bracket fails to match, we need to restore this value and also the
- values of the final offsets, in case they were set by a previous iteration of
- the same bracket.
-
- If there isn't enough space in the offset vector, treat this as if it were a
- non-capturing bracket. Don't worry about setting the flag for the error case
- here; that is handled in the code for KET. */
-
- ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
-
- stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
-
- /* For extended extraction brackets (large number), we have to fish out the
- number from a dummy opcode at the start. */
-
- if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
- stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
- stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
-
-#ifdef DEBUG
- printf("start bracket %d subject=", stack.currentFrame->locals.number);
- pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
- printf("\n");
-#endif
-
- if (stack.currentFrame->locals.offset < md.offsetMax) {
- stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
- stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
- stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
-
- DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
- md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
-
- do {
- RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
- if (isMatch)
- RRETURN;
- stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
- } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
-
- DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
-
- md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
- md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
- md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
-
- RRETURN;
- }
-
- /* Insufficient room for saving captured contents */
-
- goto NON_CAPTURING_BRACKET;
- }
-
- /* Do not stick any code in here without much thought; it is assumed
- that "continue" in the code above comes out to here to repeat the main
- loop. */
-
- } /* End of main loop */
-
- ASSERT_NOT_REACHED();
-
-#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
-
-RRETURN_SWITCH:
- switch (stack.currentFrame->returnLocation) {
- case 0: goto RETURN;
- case 1: goto RRETURN_1;
- case 2: goto RRETURN_2;
- case 6: goto RRETURN_6;
- case 7: goto RRETURN_7;
- case 14: goto RRETURN_14;
- case 15: goto RRETURN_15;
- case 16: goto RRETURN_16;
- case 17: goto RRETURN_17;
- case 18: goto RRETURN_18;
- case 19: goto RRETURN_19;
- case 20: goto RRETURN_20;
- case 21: goto RRETURN_21;
- case 22: goto RRETURN_22;
- case 24: goto RRETURN_24;
- case 26: goto RRETURN_26;
- case 27: goto RRETURN_27;
- case 28: goto RRETURN_28;
- case 29: goto RRETURN_29;
- case 30: goto RRETURN_30;
- case 31: goto RRETURN_31;
- case 38: goto RRETURN_38;
- case 40: goto RRETURN_40;
- case 42: goto RRETURN_42;
- case 44: goto RRETURN_44;
- case 48: goto RRETURN_48;
- case 52: goto RRETURN_52;
- }
-
- ASSERT_NOT_REACHED();
- return matchError(JSRegExpErrorInternal, stack);
-
-#endif
-
-RETURN:
- return isMatch;
-}
-
-
-/*************************************************
-* Execute a Regular Expression *
-*************************************************/
-
-/* This function applies a compiled re to a subject string and picks out
-portions of the string if it matches. Two elements in the vector are set for
-each substring: the offsets to the start and end of the substring.
-
-Arguments:
- re points to the compiled expression
- extra_data points to extra data or is NULL
- subject points to the subject string
- length length of subject string (may contain binary zeros)
- start_offset where to start in the subject string
- options option bits
- offsets points to a vector of ints to be filled in with offsets
- offsetCount the number of elements in the vector
-
-Returns: > 0 => success; value is the number of elements filled in
- = 0 => success, but offsets is not big enough
- -1 => failed to match
- < -1 => some kind of unexpected problem
-*/
-
-static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
-{
- // If firstByte is set, try scanning to the first instance of that byte
- // no need to try and match against any earlier part of the subject string.
- if (firstByte >= 0) {
- UChar firstChar = firstByte;
- if (firstByteIsCaseless)
- while (subjectPtr < endSubject) {
- int c = *subjectPtr;
- if (c > 127)
- break;
- if (toLowerCase(c) == firstChar)
- break;
- subjectPtr++;
- }
- else {
- while (subjectPtr < endSubject && *subjectPtr != firstChar)
- subjectPtr++;
- }
- } else if (useMultiLineFirstCharOptimization) {
- /* Or to just after \n for a multiline match if possible */
- // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
- if (subjectPtr > originalSubjectStart) {
- while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
- subjectPtr++;
- }
- }
-}
-
-static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr)
-{
- /* If reqByte is set, we know that that character must appear in the subject
- for the match to succeed. If the first character is set, reqByte must be
- later in the subject; otherwise the test starts at the match point. This
- optimization can save a huge amount of backtracking in patterns with nested
- unlimited repeats that aren't going to match. Writing separate code for
- cased/caseless versions makes it go faster, as does using an autoincrement
- and backing off on a match.
-
- HOWEVER: when the subject string is very, very long, searching to its end can
- take a long time, and give bad performance on quite ordinary patterns. This
- showed up when somebody was matching /^C/ on a 32-megabyte string... so we
- don't do this when the string is sufficiently long.
- */
-
- if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
- const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
-
- /* We don't need to repeat the search if we haven't yet reached the
- place we found it at last time. */
-
- if (p > reqBytePtr) {
- if (reqByteIsCaseless) {
- while (p < endSubject) {
- int pp = *p++;
- if (pp == reqByte || pp == reqByte2) {
- p--;
- break;
- }
- }
- } else {
- while (p < endSubject) {
- if (*p++ == reqByte) {
- p--;
- break;
- }
- }
- }
-
- /* If we can't find the required character, break the matching loop */
-
- if (p >= endSubject)
- return true;
-
- /* If we have found the required character, save the point where we
- found it, so that we don't search again next time round the loop if
- the start hasn't passed this character yet. */
-
- reqBytePtr = p;
- }
- }
- return false;
-}
-
-int jsRegExpExecute(const JSRegExp* re,
- const UChar* subject, int length, int start_offset, int* offsets,
- int offsetCount)
-{
- ASSERT(re);
- ASSERT(subject || !length);
- ASSERT(offsetCount >= 0);
- ASSERT(offsets || offsetCount == 0);
-
- HistogramTimeLogger logger(re);
-
- MatchData matchBlock;
- matchBlock.startSubject = subject;
- matchBlock.endSubject = matchBlock.startSubject + length;
- const UChar* endSubject = matchBlock.endSubject;
-
- matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
- matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
-
- /* If the expression has got more back references than the offsets supplied can
- hold, we get a temporary chunk of working store to use during the matching.
- Otherwise, we can use the vector supplied, rounding down its size to a multiple
- of 3. */
-
- int ocount = offsetCount - (offsetCount % 3);
-
- // FIXME: This is lame that we have to second-guess our caller here.
- // The API should change to either fail-hard when we don't have enough offset space
- // or that we shouldn't ask our callers to pre-allocate in the first place.
- bool usingTemporaryOffsets = false;
- if (re->topBackref > 0 && re->topBackref >= ocount/3) {
- ocount = re->topBackref * 3 + 3;
- matchBlock.offsetVector = new int[ocount];
- if (!matchBlock.offsetVector)
- return JSRegExpErrorNoMemory;
- usingTemporaryOffsets = true;
- } else
- matchBlock.offsetVector = offsets;
-
- matchBlock.offsetEnd = ocount;
- matchBlock.offsetMax = (2*ocount)/3;
- matchBlock.offsetOverflow = false;
-
- /* Compute the minimum number of offsets that we need to reset each time. Doing
- this makes a huge difference to execution time when there aren't many brackets
- in the pattern. */
-
- int resetCount = 2 + re->topBracket * 2;
- if (resetCount > offsetCount)
- resetCount = ocount;
-
- /* Reset the working variable associated with each extraction. These should
- never be used unless previously set, but they get saved and restored, and so we
- initialize them to avoid reading uninitialized locations. */
-
- if (matchBlock.offsetVector) {
- int* iptr = matchBlock.offsetVector + ocount;
- int* iend = iptr - resetCount/2 + 1;
- while (--iptr >= iend)
- *iptr = -1;
- }
-
- /* Set up the first character to match, if available. The firstByte value is
- never set for an anchored regular expression, but the anchoring may be forced
- at run time, so we have to test for anchoring. The first char may be unset for
- an unanchored pattern, of course. If there's no first char and the pattern was
- studied, there may be a bitmap of possible first characters. */
-
- bool firstByteIsCaseless = false;
- int firstByte = -1;
- if (re->options & UseFirstByteOptimizationOption) {
- firstByte = re->firstByte & 255;
- if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE)))
- firstByte = toLowerCase(firstByte);
- }
-
- /* For anchored or unanchored matches, there may be a "last known required
- character" set. */
-
- bool reqByteIsCaseless = false;
- int reqByte = -1;
- int reqByte2 = -1;
- if (re->options & UseRequiredByteOptimizationOption) {
- reqByte = re->reqByte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
- reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE);
- reqByte2 = flipCase(reqByte);
- }
-
- /* Loop for handling unanchored repeated matching attempts; for anchored regexs
- the loop runs just once. */
-
- const UChar* startMatch = subject + start_offset;
- const UChar* reqBytePtr = startMatch - 1;
- bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
-
- do {
- /* Reset the maximum number of extractions we might see. */
- if (matchBlock.offsetVector) {
- int* iptr = matchBlock.offsetVector;
- int* iend = iptr + resetCount;
- while (iptr < iend)
- *iptr++ = -1;
- }
-
- tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
- if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr))
- break;
-
- /* When a match occurs, substrings will be set for all internal extractions;
- we just need to set up the whole thing as substring 0 before returning. If
- there were too many extractions, set the return code to zero. In the case
- where we had to get some local store to hold offsets for backreferences, copy
- those back references that we can. In this case there need not be overflow
- if certain parts of the pattern were not used. */
-
- /* The code starts after the JSRegExp block and the capture name table. */
- const unsigned char* start_code = (const unsigned char*)(re + 1);
-
- int returnCode = match(startMatch, start_code, 2, matchBlock);
-
- /* When the result is no match, advance the pointer to the next character
- and continue. */
- if (returnCode == 0) {
- startMatch++;
- continue;
- }
-
- if (returnCode != 1) {
- ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExpErrorNoMemory);
- DPRINTF((">>>> error: returning %d\n", returnCode));
- return returnCode;
- }
-
- /* We have a match! Copy the offset information from temporary store if
- necessary */
-
- if (usingTemporaryOffsets) {
- if (offsetCount >= 4) {
- memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetCount - 2) * sizeof(int));
- DPRINTF(("Copied offsets from temporary memory\n"));
- }
- if (matchBlock.endOffsetTop > offsetCount)
- matchBlock.offsetOverflow = true;
-
- DPRINTF(("Freeing temporary memory\n"));
- delete [] matchBlock.offsetVector;
- }
-
- returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
-
- if (offsetCount < 2)
- returnCode = 0;
- else {
- offsets[0] = startMatch - matchBlock.startSubject;
- offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
- }
-
- DPRINTF((">>>> returning %d\n", returnCode));
- return returnCode;
- } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
-
- if (usingTemporaryOffsets) {
- DPRINTF(("Freeing temporary memory\n"));
- delete [] matchBlock.offsetVector;
- }
-
- DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
- return JSRegExpErrorNoMatch;
-}
-
-#if REGEXP_HISTOGRAM
-
-class CompareHistogramEntries {
-public:
- bool operator()(const pair<UString, double>& a, const pair<UString, double>& b)
- {
- if (a.second == b.second)
- return a.first < b.first;
- return a.second < b.second;
- }
-};
-
-Histogram::~Histogram()
-{
- Vector<pair<UString, double> > values;
- Map::iterator end = times.end();
- for (Map::iterator it = times.begin(); it != end; ++it)
- values.append(*it);
- sort(values.begin(), values.end(), CompareHistogramEntries());
- size_t size = values.size();
- printf("Regular Expressions, sorted by time spent evaluating them:\n");
- for (size_t i = 0; i < size; ++i)
- printf(" %f - %s\n", values[size - i - 1].second, values[size - i - 1].first.utf8().c_str());
-}
-
-void Histogram::add(const JSRegExp* re, double elapsedTime)
-{
- UString string(reinterpret_cast<const UChar*>(reinterpret_cast<const char*>(re) + re->stringOffset), re->stringLength);
- if (re->options & IgnoreCaseOption && re->options & MatchAcrossMultipleLinesOption)
- string += " (multi-line, ignore case)";
- else {
- if (re->options & IgnoreCaseOption)
- string += " (ignore case)";
- if (re->options & MatchAcrossMultipleLinesOption)
- string += " (multi-line)";
- }
- pair<Map::iterator, bool> result = times.add(string.impl(), elapsedTime);
- if (!result.second)
- result.first->second += elapsedTime;
-}
-
-HistogramTimeLogger::HistogramTimeLogger(const JSRegExp* re)
- : m_re(re)
- , m_startTime(currentTimeMS())
-{
-}
-
-HistogramTimeLogger::~HistogramTimeLogger()
-{
- static Histogram histogram;
- histogram.add(m_re, currentTimeMS() - m_startTime);
-}
-
-#endif