summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/yarr/RegexInterpreter.h
diff options
context:
space:
mode:
Diffstat (limited to 'JavaScriptCore/yarr/RegexInterpreter.h')
-rw-r--r--JavaScriptCore/yarr/RegexInterpreter.h62
1 files changed, 49 insertions, 13 deletions
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index 48c9a5e..2e23472 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,16 +26,33 @@
#ifndef RegexInterpreter_h
#define RegexInterpreter_h
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
-#include <wtf/unicode/Unicode.h>
#include "RegexParser.h"
#include "RegexPattern.h"
+#include <wtf/PassOwnPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WTF {
+class BumpPointerAllocator;
+}
+using WTF::BumpPointerAllocator;
namespace JSC { namespace Yarr {
+// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
+
+// The below limit restricts the number of "recursive" match calls in order to
+// avoid spending exponential time on complex regular expressions.
+static const unsigned matchLimit = 1000000;
+
+enum JSRegExpResult {
+ JSRegExpMatch = 1,
+ JSRegExpNoMatch = 0,
+ JSRegExpErrorNoMatch = -1,
+ JSRegExpErrorHitLimit = -2,
+ JSRegExpErrorNoMemory = -3,
+ JSRegExpErrorInternal = -4
+};
+
class ByteDisjunction;
struct ByteTerm {
@@ -64,6 +81,8 @@ struct ByteTerm {
TypeParenthesesSubpattern,
TypeParenthesesSubpatternOnceBegin,
TypeParenthesesSubpatternOnceEnd,
+ TypeParenthesesSubpatternTerminalBegin,
+ TypeParenthesesSubpatternTerminalEnd,
TypeParentheticalAssertionBegin,
TypeParentheticalAssertionEnd,
TypeCheckInput,
@@ -90,6 +109,7 @@ struct ByteTerm {
struct {
int next;
int end;
+ bool onceThrough;
} alternative;
unsigned checkInputCount;
};
@@ -211,19 +231,21 @@ struct ByteTerm {
return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
}
- static ByteTerm BodyAlternativeBegin()
+ static ByteTerm BodyAlternativeBegin(bool onceThrough)
{
ByteTerm term(TypeBodyAlternativeBegin);
term.alternative.next = 0;
term.alternative.end = 0;
+ term.alternative.onceThrough = onceThrough;
return term;
}
- static ByteTerm BodyAlternativeDisjunction()
+ static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
{
ByteTerm term(TypeBodyAlternativeDisjunction);
term.alternative.next = 0;
term.alternative.end = 0;
+ term.alternative.onceThrough = onceThrough;
return term;
}
@@ -232,6 +254,7 @@ struct ByteTerm {
ByteTerm term(TypeBodyAlternativeEnd);
term.alternative.next = 0;
term.alternative.end = 0;
+ term.alternative.onceThrough = false;
return term;
}
@@ -240,6 +263,7 @@ struct ByteTerm {
ByteTerm term(TypeAlternativeBegin);
term.alternative.next = 0;
term.alternative.end = 0;
+ term.alternative.onceThrough = false;
return term;
}
@@ -248,6 +272,7 @@ struct ByteTerm {
ByteTerm term(TypeAlternativeDisjunction);
term.alternative.next = 0;
term.alternative.end = 0;
+ term.alternative.onceThrough = false;
return term;
}
@@ -256,6 +281,7 @@ struct ByteTerm {
ByteTerm term(TypeAlternativeEnd);
term.alternative.next = 0;
term.alternative.end = 0;
+ term.alternative.onceThrough = false;
return term;
}
@@ -294,10 +320,12 @@ public:
};
struct BytecodePattern : FastAllocBase {
- BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
+ BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern, BumpPointerAllocator* allocator)
: m_body(body)
, m_ignoreCase(pattern.m_ignoreCase)
, m_multiline(pattern.m_multiline)
+ , m_containsBeginChars(pattern.m_containsBeginChars)
+ , m_allocator(allocator)
{
newlineCharacterClass = pattern.newlineCharacterClass();
wordcharCharacterClass = pattern.wordcharCharacterClass();
@@ -308,6 +336,8 @@ struct BytecodePattern : FastAllocBase {
// array, so that it won't delete them on destruction. We'll
// take responsibility for that.
pattern.m_userCharacterClasses.clear();
+
+ m_beginChars.append(pattern.m_beginChars);
}
~BytecodePattern()
@@ -319,19 +349,25 @@ struct BytecodePattern : FastAllocBase {
OwnPtr<ByteDisjunction> m_body;
bool m_ignoreCase;
bool m_multiline;
-
+ bool m_containsBeginChars;
+ // Each BytecodePattern is associated with a RegExp, each RegExp is associated
+ // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regexAllocator.
+ BumpPointerAllocator* m_allocator;
+
CharacterClass* newlineCharacterClass;
CharacterClass* wordcharCharacterClass;
+
+ Vector<BeginChar> m_beginChars;
+
private:
Vector<ByteDisjunction*> m_allParenthesesInfo;
Vector<CharacterClass*> m_userCharacterClasses;
};
-BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator*, bool ignoreCase = false, bool multiline = false);
+PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*);
int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
} } // namespace JSC::Yarr
-#endif
-
#endif // RegexInterpreter_h