diff options
Diffstat (limited to 'JavaScriptCore/yarr/RegexPattern.h')
| -rw-r--r-- | JavaScriptCore/yarr/RegexPattern.h | 95 |
1 files changed, 85 insertions, 10 deletions
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h index dd7512d..c76c641 100644 --- a/JavaScriptCore/yarr/RegexPattern.h +++ b/JavaScriptCore/yarr/RegexPattern.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,14 +27,9 @@ #ifndef RegexPattern_h #define RegexPattern_h -#include <wtf/Platform.h> - -#if ENABLE(YARR) - #include <wtf/Vector.h> #include <wtf/unicode/Unicode.h> - namespace JSC { namespace Yarr { #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. @@ -42,6 +38,7 @@ namespace JSC { namespace Yarr { #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1 #define RegexStackSpaceForBackTrackInfoParentheses 4 struct PatternDisjunction; @@ -57,11 +54,35 @@ struct CharacterRange { } }; +struct CharacterClassTable : RefCounted<CharacterClassTable> { + const char* m_table; + bool m_inverted; + static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted) + { + return adoptRef(new CharacterClassTable(table, inverted)); + } + +private: + CharacterClassTable(const char* table, bool inverted) + : m_table(table) + , m_inverted(inverted) + { + } +}; + struct CharacterClass : FastAllocBase { + // All CharacterClass instances have to have the full set of matches and ranges, + // they may have an optional table for faster lookups (which must match the + // specified matches and ranges) + CharacterClass(PassRefPtr<CharacterClassTable> table) + : m_table(table) + { + } Vector<UChar> m_matches; Vector<CharacterRange> m_ranges; Vector<UChar> m_matchesUnicode; Vector<CharacterRange> m_rangesUnicode; + RefPtr<CharacterClassTable> m_table; }; enum QuantifierType { @@ -92,6 +113,7 @@ struct PatternTerm { unsigned subpatternId; unsigned lastSubpatternId; bool isCopy; + bool isTerminal; } parentheses; }; QuantifierType quantityType; @@ -123,6 +145,7 @@ struct PatternTerm { parentheses.disjunction = disjunction; parentheses.subpatternId = subpatternId; parentheses.isCopy = false; + parentheses.isTerminal = false; quantityType = QuantifierFixedCount; quantityCount = 1; } @@ -184,6 +207,10 @@ struct PatternTerm { struct PatternAlternative : FastAllocBase { PatternAlternative(PatternDisjunction* disjunction) : m_parent(disjunction) + , m_onceThrough(false) + , m_hasFixedSize(false) + , m_startsWithBOL(false) + , m_containsBOL(false) { } @@ -198,16 +225,30 @@ struct PatternAlternative : FastAllocBase { ASSERT(m_terms.size()); m_terms.shrink(m_terms.size() - 1); } + + void setOnceThrough() + { + m_onceThrough = true; + } + + bool onceThrough() + { + return m_onceThrough; + } Vector<PatternTerm> m_terms; PatternDisjunction* m_parent; unsigned m_minimumSize; - bool m_hasFixedSize; + bool m_onceThrough : 1; + bool m_hasFixedSize : 1; + bool m_startsWithBOL : 1; + bool m_containsBOL : 1; }; struct PatternDisjunction : FastAllocBase { PatternDisjunction(PatternAlternative* parent = 0) : m_parent(parent) + , m_hasFixedSize(false) { } @@ -242,10 +283,37 @@ CharacterClass* nondigitsCreate(); CharacterClass* nonspacesCreate(); CharacterClass* nonwordcharCreate(); +struct TermChain { + TermChain(PatternTerm term) + : term(term) + {} + + PatternTerm term; + Vector<TermChain> hotTerms; +}; + +struct BeginChar { + BeginChar() + : value(0) + , mask(0) + {} + + BeginChar(unsigned value, unsigned mask) + : value(value) + , mask(mask) + {} + + unsigned value; + unsigned mask; +}; + struct RegexPattern { RegexPattern(bool ignoreCase, bool multiline) : m_ignoreCase(ignoreCase) , m_multiline(multiline) + , m_containsBackreferences(false) + , m_containsBeginChars(false) + , m_containsBOL(false) , m_numSubpatterns(0) , m_maxBackReference(0) , newlineCached(0) @@ -269,6 +337,10 @@ struct RegexPattern { m_numSubpatterns = 0; m_maxBackReference = 0; + m_containsBackreferences = false; + m_containsBeginChars = false; + m_containsBOL = false; + newlineCached = 0; digitsCached = 0; spacesCached = 0; @@ -281,6 +353,7 @@ struct RegexPattern { m_disjunctions.clear(); deleteAllValues(m_userCharacterClasses); m_userCharacterClasses.clear(); + m_beginChars.clear(); } bool containsIllegalBackReference() @@ -331,13 +404,17 @@ struct RegexPattern { return nonwordcharCached; } - bool m_ignoreCase; - bool m_multiline; + bool m_ignoreCase : 1; + bool m_multiline : 1; + bool m_containsBackreferences : 1; + bool m_containsBeginChars : 1; + bool m_containsBOL : 1; unsigned m_numSubpatterns; unsigned m_maxBackReference; PatternDisjunction* m_body; Vector<PatternDisjunction*, 4> m_disjunctions; Vector<CharacterClass*> m_userCharacterClasses; + Vector<BeginChar> m_beginChars; private: CharacterClass* newlineCached; @@ -351,6 +428,4 @@ private: } } // namespace JSC::Yarr -#endif - #endif // RegexPattern_h |
