1 files changed, 49 insertions, 13 deletions
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index 48c9a5e..2e23472 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,16 +26,33 @@
 #ifndef RegexInterpreter_h
 #define RegexInterpreter_h
 
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
-#include <wtf/unicode/Unicode.h>
 #include "RegexParser.h"
 #include "RegexPattern.h"
+#include <wtf/PassOwnPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WTF {
+class BumpPointerAllocator;
+}
+using WTF::BumpPointerAllocator;
 
 namespace JSC { namespace Yarr {
 
+// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
+
+// The below limit restricts the number of "recursive" match calls in order to
+// avoid spending exponential time on complex regular expressions.
+static const unsigned matchLimit = 1000000;
+
+enum JSRegExpResult {
+    JSRegExpMatch = 1,
+    JSRegExpNoMatch = 0,
+    JSRegExpErrorNoMatch = -1,
+    JSRegExpErrorHitLimit = -2,
+    JSRegExpErrorNoMemory = -3,
+    JSRegExpErrorInternal = -4
+};
+
 class ByteDisjunction;
 
 struct ByteTerm {
@@ -64,6 +81,8 @@ struct ByteTerm {
         TypeParenthesesSubpattern,
         TypeParenthesesSubpatternOnceBegin,
         TypeParenthesesSubpatternOnceEnd,
+        TypeParenthesesSubpatternTerminalBegin,
+        TypeParenthesesSubpatternTerminalEnd,
         TypeParentheticalAssertionBegin,
         TypeParentheticalAssertionEnd,
         TypeCheckInput,
@@ -90,6 +109,7 @@ struct ByteTerm {
         struct {
             int next;
             int end;
+            bool onceThrough;
         } alternative;
         unsigned checkInputCount;
     };
@@ -211,19 +231,21 @@ struct ByteTerm {
         return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
     }
 
-    static ByteTerm BodyAlternativeBegin()
+    static ByteTerm BodyAlternativeBegin(bool onceThrough)
     {
         ByteTerm term(TypeBodyAlternativeBegin);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = onceThrough;
         return term;
     }
 
-    static ByteTerm BodyAlternativeDisjunction()
+    static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
     {
         ByteTerm term(TypeBodyAlternativeDisjunction);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = onceThrough;
         return term;
     }
 
@@ -232,6 +254,7 @@ struct ByteTerm {
         ByteTerm term(TypeBodyAlternativeEnd);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -240,6 +263,7 @@ struct ByteTerm {
         ByteTerm term(TypeAlternativeBegin);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -248,6 +272,7 @@ struct ByteTerm {
         ByteTerm term(TypeAlternativeDisjunction);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -256,6 +281,7 @@ struct ByteTerm {
         ByteTerm term(TypeAlternativeEnd);
         term.alternative.next = 0;
         term.alternative.end = 0;
+        term.alternative.onceThrough = false;
         return term;
     }
 
@@ -294,10 +320,12 @@ public:
 };
 
 struct BytecodePattern : FastAllocBase {
-    BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
+    BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern, BumpPointerAllocator* allocator)
         : m_body(body)
         , m_ignoreCase(pattern.m_ignoreCase)
         , m_multiline(pattern.m_multiline)
+        , m_containsBeginChars(pattern.m_containsBeginChars)
+        , m_allocator(allocator)
     {
         newlineCharacterClass = pattern.newlineCharacterClass();
         wordcharCharacterClass = pattern.wordcharCharacterClass();
@@ -308,6 +336,8 @@ struct BytecodePattern : FastAllocBase {
         // array, so that it won't delete them on destruction.  We'll
         // take responsibility for that.
         pattern.m_userCharacterClasses.clear();
+
+        m_beginChars.append(pattern.m_beginChars);
     }
 
     ~BytecodePattern()
@@ -319,19 +349,25 @@ struct BytecodePattern : FastAllocBase {
     OwnPtr<ByteDisjunction> m_body;
     bool m_ignoreCase;
     bool m_multiline;
-    
+    bool m_containsBeginChars;
+    // Each BytecodePattern is associated with a RegExp, each RegExp is associated
+    // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regexAllocator.
+    BumpPointerAllocator* m_allocator;
+
     CharacterClass* newlineCharacterClass;
     CharacterClass* wordcharCharacterClass;
+
+    Vector<BeginChar> m_beginChars;
+
 private:
     Vector<ByteDisjunction*> m_allParenthesesInfo;
     Vector<CharacterClass*> m_userCharacterClasses;
 };
 
-BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, BumpPointerAllocator*, bool ignoreCase = false, bool multiline = false);
+PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*);
 int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
 
 } } // namespace JSC::Yarr
 
-#endif
-
 #endif // RegexInterpreter_h