summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/yarr/RegexPattern.h
diff options
context:
space:
mode:
Diffstat (limited to 'JavaScriptCore/yarr/RegexPattern.h')
-rw-r--r--JavaScriptCore/yarr/RegexPattern.h95
1 files changed, 85 insertions, 10 deletions
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h
index dd7512d..c76c641 100644
--- a/JavaScriptCore/yarr/RegexPattern.h
+++ b/JavaScriptCore/yarr/RegexPattern.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,14 +27,9 @@
#ifndef RegexPattern_h
#define RegexPattern_h
-#include <wtf/Platform.h>
-
-#if ENABLE(YARR)
-
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>
-
namespace JSC { namespace Yarr {
#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
@@ -42,6 +38,7 @@ namespace JSC { namespace Yarr {
#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1
#define RegexStackSpaceForBackTrackInfoParentheses 4
struct PatternDisjunction;
@@ -57,11 +54,35 @@ struct CharacterRange {
}
};
+struct CharacterClassTable : RefCounted<CharacterClassTable> {
+ const char* m_table;
+ bool m_inverted;
+ static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
+ {
+ return adoptRef(new CharacterClassTable(table, inverted));
+ }
+
+private:
+ CharacterClassTable(const char* table, bool inverted)
+ : m_table(table)
+ , m_inverted(inverted)
+ {
+ }
+};
+
struct CharacterClass : FastAllocBase {
+ // All CharacterClass instances have to have the full set of matches and ranges,
+ // they may have an optional table for faster lookups (which must match the
+ // specified matches and ranges)
+ CharacterClass(PassRefPtr<CharacterClassTable> table)
+ : m_table(table)
+ {
+ }
Vector<UChar> m_matches;
Vector<CharacterRange> m_ranges;
Vector<UChar> m_matchesUnicode;
Vector<CharacterRange> m_rangesUnicode;
+ RefPtr<CharacterClassTable> m_table;
};
enum QuantifierType {
@@ -92,6 +113,7 @@ struct PatternTerm {
unsigned subpatternId;
unsigned lastSubpatternId;
bool isCopy;
+ bool isTerminal;
} parentheses;
};
QuantifierType quantityType;
@@ -123,6 +145,7 @@ struct PatternTerm {
parentheses.disjunction = disjunction;
parentheses.subpatternId = subpatternId;
parentheses.isCopy = false;
+ parentheses.isTerminal = false;
quantityType = QuantifierFixedCount;
quantityCount = 1;
}
@@ -184,6 +207,10 @@ struct PatternTerm {
struct PatternAlternative : FastAllocBase {
PatternAlternative(PatternDisjunction* disjunction)
: m_parent(disjunction)
+ , m_onceThrough(false)
+ , m_hasFixedSize(false)
+ , m_startsWithBOL(false)
+ , m_containsBOL(false)
{
}
@@ -198,16 +225,30 @@ struct PatternAlternative : FastAllocBase {
ASSERT(m_terms.size());
m_terms.shrink(m_terms.size() - 1);
}
+
+ void setOnceThrough()
+ {
+ m_onceThrough = true;
+ }
+
+ bool onceThrough()
+ {
+ return m_onceThrough;
+ }
Vector<PatternTerm> m_terms;
PatternDisjunction* m_parent;
unsigned m_minimumSize;
- bool m_hasFixedSize;
+ bool m_onceThrough : 1;
+ bool m_hasFixedSize : 1;
+ bool m_startsWithBOL : 1;
+ bool m_containsBOL : 1;
};
struct PatternDisjunction : FastAllocBase {
PatternDisjunction(PatternAlternative* parent = 0)
: m_parent(parent)
+ , m_hasFixedSize(false)
{
}
@@ -242,10 +283,37 @@ CharacterClass* nondigitsCreate();
CharacterClass* nonspacesCreate();
CharacterClass* nonwordcharCreate();
+struct TermChain {
+ TermChain(PatternTerm term)
+ : term(term)
+ {}
+
+ PatternTerm term;
+ Vector<TermChain> hotTerms;
+};
+
+struct BeginChar {
+ BeginChar()
+ : value(0)
+ , mask(0)
+ {}
+
+ BeginChar(unsigned value, unsigned mask)
+ : value(value)
+ , mask(mask)
+ {}
+
+ unsigned value;
+ unsigned mask;
+};
+
struct RegexPattern {
RegexPattern(bool ignoreCase, bool multiline)
: m_ignoreCase(ignoreCase)
, m_multiline(multiline)
+ , m_containsBackreferences(false)
+ , m_containsBeginChars(false)
+ , m_containsBOL(false)
, m_numSubpatterns(0)
, m_maxBackReference(0)
, newlineCached(0)
@@ -269,6 +337,10 @@ struct RegexPattern {
m_numSubpatterns = 0;
m_maxBackReference = 0;
+ m_containsBackreferences = false;
+ m_containsBeginChars = false;
+ m_containsBOL = false;
+
newlineCached = 0;
digitsCached = 0;
spacesCached = 0;
@@ -281,6 +353,7 @@ struct RegexPattern {
m_disjunctions.clear();
deleteAllValues(m_userCharacterClasses);
m_userCharacterClasses.clear();
+ m_beginChars.clear();
}
bool containsIllegalBackReference()
@@ -331,13 +404,17 @@ struct RegexPattern {
return nonwordcharCached;
}
- bool m_ignoreCase;
- bool m_multiline;
+ bool m_ignoreCase : 1;
+ bool m_multiline : 1;
+ bool m_containsBackreferences : 1;
+ bool m_containsBeginChars : 1;
+ bool m_containsBOL : 1;
unsigned m_numSubpatterns;
unsigned m_maxBackReference;
PatternDisjunction* m_body;
Vector<PatternDisjunction*, 4> m_disjunctions;
Vector<CharacterClass*> m_userCharacterClasses;
+ Vector<BeginChar> m_beginChars;
private:
CharacterClass* newlineCached;
@@ -351,6 +428,4 @@ private:
} } // namespace JSC::Yarr
-#endif
-
#endif // RegexPattern_h