diff options
Diffstat (limited to 'JavaScriptCore/yarr/RegexInterpreter.h')
| -rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.h | 62 |
1 files changed, 49 insertions, 13 deletions
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h index 48c9a5e..2e23472 100644 --- a/JavaScriptCore/yarr/RegexInterpreter.h +++ b/JavaScriptCore/yarr/RegexInterpreter.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,16 +26,33 @@ #ifndef RegexInterpreter_h #define RegexInterpreter_h -#include <wtf/Platform.h> - -#if ENABLE(YARR) - -#include <wtf/unicode/Unicode.h> #include "RegexParser.h" #include "RegexPattern.h" +#include <wtf/PassOwnPtr.h> +#include <wtf/unicode/Unicode.h> + +namespace WTF { +class BumpPointerAllocator; +} +using WTF::BumpPointerAllocator; namespace JSC { namespace Yarr { +// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed. + +// The below limit restricts the number of "recursive" match calls in order to +// avoid spending exponential time on complex regular expressions. +static const unsigned matchLimit = 1000000; + +enum JSRegExpResult { + JSRegExpMatch = 1, + JSRegExpNoMatch = 0, + JSRegExpErrorNoMatch = -1, + JSRegExpErrorHitLimit = -2, + JSRegExpErrorNoMemory = -3, + JSRegExpErrorInternal = -4 +}; + class ByteDisjunction; struct ByteTerm { @@ -64,6 +81,8 @@ struct ByteTerm { TypeParenthesesSubpattern, TypeParenthesesSubpatternOnceBegin, TypeParenthesesSubpatternOnceEnd, + TypeParenthesesSubpatternTerminalBegin, + TypeParenthesesSubpatternTerminalEnd, TypeParentheticalAssertionBegin, TypeParentheticalAssertionEnd, TypeCheckInput, @@ -90,6 +109,7 @@ struct ByteTerm { struct { int next; int end; + bool onceThrough; } alternative; unsigned checkInputCount; }; @@ -211,19 +231,21 @@ struct ByteTerm { return ByteTerm(TypeBackReference, subpatternId, false, inputPos); } - static ByteTerm BodyAlternativeBegin() + static ByteTerm BodyAlternativeBegin(bool onceThrough) { ByteTerm term(TypeBodyAlternativeBegin); term.alternative.next = 0; term.alternative.end = 0; + term.alternative.onceThrough = onceThrough; return term; } - static ByteTerm BodyAlternativeDisjunction() + static ByteTerm BodyAlternativeDisjunction(bool onceThrough) { ByteTerm term(TypeBodyAlternativeDisjunction); term.alternative.next = 0; term.alternative.end = 0; + term.alternative.onceThrough = onceThrough; return term; } @@ -232,6 +254,7 @@ struct ByteTerm { ByteTerm term(TypeBodyAlternativeEnd); term.alternative.next = 0; term.alternative.end = 0; + term.alternative.onceThrough = false; return term; } @@ -240,6 +263,7 @@ struct ByteTerm { ByteTerm term(TypeAlternativeBegin); term.alternative.next = 0; term.alternative.end = 0; + term.alternative.onceThrough = false; return term; } @@ -248,6 +272,7 @@ struct ByteTerm { ByteTerm term(TypeAlternativeDisjunction); term.alternative.next = 0; term.alternative.end = 0; + term.alternative.onceThrough = false; return term; } @@ -256,6 +281,7 @@ struct ByteTerm { ByteTerm term(TypeAlternativeEnd); term.alternative.next = 0; term.alternative.end = 0; + term.alternative.onceThrough = false; return term; } @@ -294,10 +320,12 @@ public: }; struct BytecodePattern : FastAllocBase { - BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern) + BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern, BumpPointerAllocator* allocator) : m_body(body) , m_ignoreCase(pattern.m_ignoreCase) , m_multiline(pattern.m_multiline) + , m_containsBeginChars(pattern.m_containsBeginChars) + , m_allocator(allocator) { newlineCharacterClass = pattern.newlineCharacterClass(); wordcharCharacterClass = pattern.wordcharCharacterClass(); @@ -308,6 +336,8 @@ struct BytecodePattern : FastAllocBase { // array, so that it won't delete them on destruction. We'll // take responsibility for that. pattern.m_userCharacterClasses.clear(); + + m_beginChars.append(pattern.m_beginChars); } ~BytecodePattern() @@ -319,19 +349,25 @@ struct BytecodePattern : FastAllocBase { OwnPtr<ByteDisjunction> m_body; bool m_ignoreCase; bool m_multiline; - + bool m_containsBeginChars; + // Each BytecodePattern is associated with a RegExp, each RegExp is associated + // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regexAllocator. + BumpPointerAllocator* m_allocator; + CharacterClass* newlineCharacterClass; CharacterClass* wordcharCharacterClass; + + Vector<BeginChar> m_beginChars; + private: Vector<ByteDisjunction*> m_allParenthesesInfo; Vector<CharacterClass*> m_userCharacterClasses; }; -BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false); +PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator*, bool ignoreCase = false, bool multiline = false); +PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*); int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output); } } // namespace JSC::Yarr -#endif - #endif // RegexInterpreter_h |
