diff options
author | Feng Qian <fqian@google.com> | 2009-06-17 12:12:20 -0700 |
---|---|---|
committer | Feng Qian <fqian@google.com> | 2009-06-17 12:12:20 -0700 |
commit | 5f1ab04193ad0130ca8204aadaceae083aca9881 (patch) | |
tree | 5a92cd389e2cfe7fb67197ce14b38469462379f8 /JavaScriptCore/yarr/RegexInterpreter.h | |
parent | 194315e5a908cc8ed67d597010544803eef1ac59 (diff) | |
download | external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.zip external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.gz external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.bz2 |
Get WebKit r44544.
Diffstat (limited to 'JavaScriptCore/yarr/RegexInterpreter.h')
-rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.h | 337 |
1 files changed, 337 insertions, 0 deletions
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h new file mode 100644 index 0000000..a8c122a --- /dev/null +++ b/JavaScriptCore/yarr/RegexInterpreter.h @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RegexInterpreter_h +#define RegexInterpreter_h + +#include <wtf/Platform.h> + +#if ENABLE(YARR) + +#include <wtf/unicode/Unicode.h> +#include "RegexParser.h" +#include "RegexPattern.h" + +namespace JSC { namespace Yarr { + +class ByteDisjunction; + +struct ByteTerm { + enum Type { + TypeBodyAlternativeBegin, + TypeBodyAlternativeDisjunction, + TypeBodyAlternativeEnd, + TypeAlternativeBegin, + TypeAlternativeDisjunction, + TypeAlternativeEnd, + TypeSubpatternBegin, + TypeSubpatternEnd, + TypeAssertionBOL, + TypeAssertionEOL, + TypeAssertionWordBoundary, + TypePatternCharacterOnce, + TypePatternCharacterFixed, + TypePatternCharacterGreedy, + TypePatternCharacterNonGreedy, + TypePatternCasedCharacterOnce, + TypePatternCasedCharacterFixed, + TypePatternCasedCharacterGreedy, + TypePatternCasedCharacterNonGreedy, + TypeCharacterClass, + TypeBackReference, + TypeParenthesesSubpattern, + TypeParenthesesSubpatternOnceBegin, + TypeParenthesesSubpatternOnceEnd, + TypeParentheticalAssertionBegin, + TypeParentheticalAssertionEnd, + TypeCheckInput, + } type; + bool invertOrCapture; + union { + struct { + union { + UChar patternCharacter; + struct { + UChar lo; + UChar hi; + } casedCharacter; + CharacterClass* characterClass; + unsigned subpatternId; + }; + union { + ByteDisjunction* parenthesesDisjunction; + unsigned parenthesesWidth; + }; + QuantifierType quantityType; + unsigned quantityCount; + } atom; + struct { + int next; + int end; + } alternative; + unsigned checkInputCount; + }; + unsigned frameLocation; + int inputPosition; + + ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + : frameLocation(frameLocation) + { + switch (quantityType) { + case QuantifierFixedCount: + type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; + break; + case QuantifierGreedy: + type = ByteTerm::TypePatternCharacterGreedy; + break; + case QuantifierNonGreedy: + type = ByteTerm::TypePatternCharacterNonGreedy; + break; + } + + atom.patternCharacter = ch; + atom.quantityType = quantityType; + atom.quantityCount = quantityCount; + inputPosition = inputPos; + } + + ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + : frameLocation(frameLocation) + { + switch (quantityType) { + case QuantifierFixedCount: + type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed; + break; + case QuantifierGreedy: + type = ByteTerm::TypePatternCasedCharacterGreedy; + break; + case QuantifierNonGreedy: + type = ByteTerm::TypePatternCasedCharacterNonGreedy; + break; + } + + atom.casedCharacter.lo = lo; + atom.casedCharacter.hi = hi; + atom.quantityType = quantityType; + atom.quantityCount = quantityCount; + inputPosition = inputPos; + } + + ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) + : type(ByteTerm::TypeCharacterClass) + , invertOrCapture(invert) + { + atom.characterClass = characterClass; + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + inputPosition = inputPos; + } + + ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos) + : type(type) + , invertOrCapture(invertOrCapture) + { + atom.subpatternId = subpatternId; + atom.parenthesesDisjunction = parenthesesInfo; + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + inputPosition = inputPos; + } + + ByteTerm(Type type, bool invert = false) + : type(type) + , invertOrCapture(invert) + { + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + } + + ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos) + : type(type) + , invertOrCapture(invertOrCapture) + { + atom.subpatternId = subpatternId; + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + inputPosition = inputPos; + } + + static ByteTerm BOL(int inputPos) + { + ByteTerm term(TypeAssertionBOL); + term.inputPosition = inputPos; + return term; + } + + static ByteTerm CheckInput(unsigned count) + { + ByteTerm term(TypeCheckInput); + term.checkInputCount = count; + return term; + } + + static ByteTerm EOL(int inputPos) + { + ByteTerm term(TypeAssertionEOL); + term.inputPosition = inputPos; + return term; + } + + static ByteTerm WordBoundary(bool invert, int inputPos) + { + ByteTerm term(TypeAssertionWordBoundary, invert); + term.inputPosition = inputPos; + return term; + } + + static ByteTerm BackReference(unsigned subpatternId, int inputPos) + { + return ByteTerm(TypeBackReference, subpatternId, false, inputPos); + } + + static ByteTerm BodyAlternativeBegin() + { + ByteTerm term(TypeBodyAlternativeBegin); + term.alternative.next = 0; + term.alternative.end = 0; + return term; + } + + static ByteTerm BodyAlternativeDisjunction() + { + ByteTerm term(TypeBodyAlternativeDisjunction); + term.alternative.next = 0; + term.alternative.end = 0; + return term; + } + + static ByteTerm BodyAlternativeEnd() + { + ByteTerm term(TypeBodyAlternativeEnd); + term.alternative.next = 0; + term.alternative.end = 0; + return term; + } + + static ByteTerm AlternativeBegin() + { + ByteTerm term(TypeAlternativeBegin); + term.alternative.next = 0; + term.alternative.end = 0; + return term; + } + + static ByteTerm AlternativeDisjunction() + { + ByteTerm term(TypeAlternativeDisjunction); + term.alternative.next = 0; + term.alternative.end = 0; + return term; + } + + static ByteTerm AlternativeEnd() + { + ByteTerm term(TypeAlternativeEnd); + term.alternative.next = 0; + term.alternative.end = 0; + return term; + } + + static ByteTerm SubpatternBegin() + { + return ByteTerm(TypeSubpatternBegin); + } + + static ByteTerm SubpatternEnd() + { + return ByteTerm(TypeSubpatternEnd); + } + + bool invert() + { + return invertOrCapture; + } + + bool capture() + { + return invertOrCapture; + } +}; + +class ByteDisjunction { +public: + ByteDisjunction(unsigned numSubpatterns, unsigned frameSize) + : m_numSubpatterns(numSubpatterns) + , m_frameSize(frameSize) + { + } + + Vector<ByteTerm> terms; + unsigned m_numSubpatterns; + unsigned m_frameSize; +}; + +struct BytecodePattern { + BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern) + : m_body(body) + , m_ignoreCase(pattern.m_ignoreCase) + , m_multiline(pattern.m_multiline) + { + newlineCharacterClass = pattern.newlineCharacterClass(); + wordcharCharacterClass = pattern.wordcharCharacterClass(); + + m_allParenthesesInfo.append(allParenthesesInfo); + m_userCharacterClasses.append(pattern.m_userCharacterClasses); + // 'Steal' the RegexPattern's CharacterClasses! We clear its + // array, so that it won't delete them on destruction. We'll + // take responsibility for that. + pattern.m_userCharacterClasses.clear(); + } + + ~BytecodePattern() + { + deleteAllValues(m_allParenthesesInfo); + deleteAllValues(m_userCharacterClasses); + } + + OwnPtr<ByteDisjunction> m_body; + bool m_ignoreCase; + bool m_multiline; + + CharacterClass* newlineCharacterClass; + CharacterClass* wordcharCharacterClass; +private: + Vector<ByteDisjunction*> m_allParenthesesInfo; + Vector<CharacterClass*> m_userCharacterClasses; +}; + +BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false); +int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output); + +} } // namespace JSC::Yarr + +#endif + +#endif // RegexInterpreter_h |