diff options
author | Feng Qian <fqian@google.com> | 2009-06-17 12:12:20 -0700 |
---|---|---|
committer | Feng Qian <fqian@google.com> | 2009-06-17 12:12:20 -0700 |
commit | 5f1ab04193ad0130ca8204aadaceae083aca9881 (patch) | |
tree | 5a92cd389e2cfe7fb67197ce14b38469462379f8 /JavaScriptCore/yarr/RegexPattern.h | |
parent | 194315e5a908cc8ed67d597010544803eef1ac59 (diff) | |
download | external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.zip external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.gz external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.bz2 |
Get WebKit r44544.
Diffstat (limited to 'JavaScriptCore/yarr/RegexPattern.h')
-rw-r--r-- | JavaScriptCore/yarr/RegexPattern.h | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h new file mode 100644 index 0000000..fb1b0ab --- /dev/null +++ b/JavaScriptCore/yarr/RegexPattern.h @@ -0,0 +1,356 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RegexPattern_h +#define RegexPattern_h + +#include <wtf/Platform.h> + +#if ENABLE(YARR) + +#include <wtf/Vector.h> +#include <wtf/unicode/Unicode.h> + + +namespace JSC { namespace Yarr { + +#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoBackReference 2 +#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. +#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 +#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoParentheses 4 + +struct PatternDisjunction; + +struct CharacterRange { + UChar begin; + UChar end; + + CharacterRange(UChar begin, UChar end) + : begin(begin) + , end(end) + { + } +}; + +struct CharacterClass { + Vector<UChar> m_matches; + Vector<CharacterRange> m_ranges; + Vector<UChar> m_matchesUnicode; + Vector<CharacterRange> m_rangesUnicode; +}; + +enum QuantifierType { + QuantifierFixedCount, + QuantifierGreedy, + QuantifierNonGreedy, +}; + +struct PatternTerm { + enum Type { + TypeAssertionBOL, + TypeAssertionEOL, + TypeAssertionWordBoundary, + TypePatternCharacter, + TypeCharacterClass, + TypeBackReference, + TypeForwardReference, + TypeParenthesesSubpattern, + TypeParentheticalAssertion, + } type; + bool invertOrCapture; + union { + UChar patternCharacter; + CharacterClass* characterClass; + unsigned subpatternId; + struct { + PatternDisjunction* disjunction; + unsigned subpatternId; + unsigned lastSubpatternId; + bool isCopy; + } parentheses; + }; + QuantifierType quantityType; + unsigned quantityCount; + int inputPosition; + unsigned frameLocation; + + PatternTerm(UChar ch) + : type(PatternTerm::TypePatternCharacter) + { + patternCharacter = ch; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(CharacterClass* charClass, bool invert) + : type(PatternTerm::TypeCharacterClass) + , invertOrCapture(invert) + { + characterClass = charClass; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) + : type(type) + , invertOrCapture(invertOrCapture) + { + parentheses.disjunction = disjunction; + parentheses.subpatternId = subpatternId; + parentheses.isCopy = false; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(Type type, bool invert = false) + : type(type) + , invertOrCapture(invert) + { + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(unsigned spatternId) + : type(TypeBackReference) + , invertOrCapture(invertOrCapture) + { + subpatternId = spatternId; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + static PatternTerm ForwardReference() + { + return PatternTerm(TypeForwardReference); + } + + static PatternTerm BOL() + { + return PatternTerm(TypeAssertionBOL); + } + + static PatternTerm EOL() + { + return PatternTerm(TypeAssertionEOL); + } + + static PatternTerm WordBoundary(bool invert) + { + return PatternTerm(TypeAssertionWordBoundary, invert); + } + + bool invert() + { + return invertOrCapture; + } + + bool capture() + { + return invertOrCapture; + } + + void quantify(unsigned count, QuantifierType type) + { + quantityCount = count; + quantityType = type; + } +}; + +struct PatternAlternative { + PatternAlternative(PatternDisjunction* disjunction) + : m_parent(disjunction) + { + } + + PatternTerm& lastTerm() + { + ASSERT(m_terms.size()); + return m_terms[m_terms.size() - 1]; + } + + void removeLastTerm() + { + ASSERT(m_terms.size()); + m_terms.shrink(m_terms.size() - 1); + } + + Vector<PatternTerm> m_terms; + PatternDisjunction* m_parent; + unsigned m_minimumSize; + bool m_hasFixedSize; +}; + +struct PatternDisjunction { + PatternDisjunction(PatternAlternative* parent = 0) + : m_parent(parent) + { + } + + ~PatternDisjunction() + { + deleteAllValues(m_alternatives); + } + + PatternAlternative* addNewAlternative() + { + PatternAlternative* alternative = new PatternAlternative(this); + m_alternatives.append(alternative); + return alternative; + } + + Vector<PatternAlternative*> m_alternatives; + PatternAlternative* m_parent; + unsigned m_minimumSize; + unsigned m_callFrameSize; + bool m_hasFixedSize; +}; + +// You probably don't want to be calling these functions directly +// (please to be calling newlineCharacterClass() et al on your +// friendly neighborhood RegexPattern instance to get nicely +// cached copies). +CharacterClass* newlineCreate(); +CharacterClass* digitsCreate(); +CharacterClass* spacesCreate(); +CharacterClass* wordcharCreate(); +CharacterClass* nondigitsCreate(); +CharacterClass* nonspacesCreate(); +CharacterClass* nonwordcharCreate(); + +struct RegexPattern { + RegexPattern(bool ignoreCase, bool multiline) + : m_ignoreCase(ignoreCase) + , m_multiline(multiline) + , m_numSubpatterns(0) + , m_maxBackReference(0) + , newlineCached(0) + , digitsCached(0) + , spacesCached(0) + , wordcharCached(0) + , nondigitsCached(0) + , nonspacesCached(0) + , nonwordcharCached(0) + { + } + + ~RegexPattern() + { + deleteAllValues(m_disjunctions); + deleteAllValues(m_userCharacterClasses); + } + + void reset() + { + m_numSubpatterns = 0; + m_maxBackReference = 0; + + newlineCached = 0; + digitsCached = 0; + spacesCached = 0; + wordcharCached = 0; + nondigitsCached = 0; + nonspacesCached = 0; + nonwordcharCached = 0; + + deleteAllValues(m_disjunctions); + m_disjunctions.clear(); + deleteAllValues(m_userCharacterClasses); + m_userCharacterClasses.clear(); + } + + bool containsIllegalBackReference() + { + return m_maxBackReference > m_numSubpatterns; + } + + CharacterClass* newlineCharacterClass() + { + if (!newlineCached) + m_userCharacterClasses.append(newlineCached = newlineCreate()); + return newlineCached; + } + CharacterClass* digitsCharacterClass() + { + if (!digitsCached) + m_userCharacterClasses.append(digitsCached = digitsCreate()); + return digitsCached; + } + CharacterClass* spacesCharacterClass() + { + if (!spacesCached) + m_userCharacterClasses.append(spacesCached = spacesCreate()); + return spacesCached; + } + CharacterClass* wordcharCharacterClass() + { + if (!wordcharCached) + m_userCharacterClasses.append(wordcharCached = wordcharCreate()); + return wordcharCached; + } + CharacterClass* nondigitsCharacterClass() + { + if (!nondigitsCached) + m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); + return nondigitsCached; + } + CharacterClass* nonspacesCharacterClass() + { + if (!nonspacesCached) + m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); + return nonspacesCached; + } + CharacterClass* nonwordcharCharacterClass() + { + if (!nonwordcharCached) + m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); + return nonwordcharCached; + } + + bool m_ignoreCase; + bool m_multiline; + unsigned m_numSubpatterns; + unsigned m_maxBackReference; + PatternDisjunction* m_body; + Vector<PatternDisjunction*, 4> m_disjunctions; + Vector<CharacterClass*> m_userCharacterClasses; + +private: + CharacterClass* newlineCached; + CharacterClass* digitsCached; + CharacterClass* spacesCached; + CharacterClass* wordcharCached; + CharacterClass* nondigitsCached; + CharacterClass* nonspacesCached; + CharacterClass* nonwordcharCached; +}; + +} } // namespace JSC::Yarr + +#endif + +#endif // RegexPattern_h |