summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/yarr/RegexInterpreter.h
diff options
context:
space:
mode:
authorFeng Qian <fqian@google.com>2009-06-17 12:12:20 -0700
committerFeng Qian <fqian@google.com>2009-06-17 12:12:20 -0700
commit5f1ab04193ad0130ca8204aadaceae083aca9881 (patch)
tree5a92cd389e2cfe7fb67197ce14b38469462379f8 /JavaScriptCore/yarr/RegexInterpreter.h
parent194315e5a908cc8ed67d597010544803eef1ac59 (diff)
downloadexternal_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.zip
external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.gz
external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.bz2
Get WebKit r44544.
Diffstat (limited to 'JavaScriptCore/yarr/RegexInterpreter.h')
-rw-r--r--JavaScriptCore/yarr/RegexInterpreter.h337
1 files changed, 337 insertions, 0 deletions
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
new file mode 100644
index 0000000..a8c122a
--- /dev/null
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexInterpreter_h
+#define RegexInterpreter_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/unicode/Unicode.h>
+#include "RegexParser.h"
+#include "RegexPattern.h"
+
+namespace JSC { namespace Yarr {
+
+class ByteDisjunction;
+
+struct ByteTerm {
+ enum Type {
+ TypeBodyAlternativeBegin,
+ TypeBodyAlternativeDisjunction,
+ TypeBodyAlternativeEnd,
+ TypeAlternativeBegin,
+ TypeAlternativeDisjunction,
+ TypeAlternativeEnd,
+ TypeSubpatternBegin,
+ TypeSubpatternEnd,
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacterOnce,
+ TypePatternCharacterFixed,
+ TypePatternCharacterGreedy,
+ TypePatternCharacterNonGreedy,
+ TypePatternCasedCharacterOnce,
+ TypePatternCasedCharacterFixed,
+ TypePatternCasedCharacterGreedy,
+ TypePatternCasedCharacterNonGreedy,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeParenthesesSubpattern,
+ TypeParenthesesSubpatternOnceBegin,
+ TypeParenthesesSubpatternOnceEnd,
+ TypeParentheticalAssertionBegin,
+ TypeParentheticalAssertionEnd,
+ TypeCheckInput,
+ } type;
+ bool invertOrCapture;
+ union {
+ struct {
+ union {
+ UChar patternCharacter;
+ struct {
+ UChar lo;
+ UChar hi;
+ } casedCharacter;
+ CharacterClass* characterClass;
+ unsigned subpatternId;
+ };
+ union {
+ ByteDisjunction* parenthesesDisjunction;
+ unsigned parenthesesWidth;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ } atom;
+ struct {
+ int next;
+ int end;
+ } alternative;
+ unsigned checkInputCount;
+ };
+ unsigned frameLocation;
+ int inputPosition;
+
+ ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCharacterNonGreedy;
+ break;
+ }
+
+ atom.patternCharacter = ch;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCasedCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCasedCharacterNonGreedy;
+ break;
+ }
+
+ atom.casedCharacter.lo = lo;
+ atom.casedCharacter.hi = hi;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
+ : type(ByteTerm::TypeCharacterClass)
+ , invertOrCapture(invert)
+ {
+ atom.characterClass = characterClass;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ atom.subpatternId = subpatternId;
+ atom.parenthesesDisjunction = parenthesesInfo;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, bool invert = false)
+ : type(type)
+ , invertOrCapture(invert)
+ {
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ atom.subpatternId = subpatternId;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ static ByteTerm BOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionBOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm CheckInput(unsigned count)
+ {
+ ByteTerm term(TypeCheckInput);
+ term.checkInputCount = count;
+ return term;
+ }
+
+ static ByteTerm EOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionEOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm WordBoundary(bool invert, int inputPos)
+ {
+ ByteTerm term(TypeAssertionWordBoundary, invert);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm BackReference(unsigned subpatternId, int inputPos)
+ {
+ return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
+ }
+
+ static ByteTerm BodyAlternativeBegin()
+ {
+ ByteTerm term(TypeBodyAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeDisjunction()
+ {
+ ByteTerm term(TypeBodyAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeEnd()
+ {
+ ByteTerm term(TypeBodyAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeBegin()
+ {
+ ByteTerm term(TypeAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeDisjunction()
+ {
+ ByteTerm term(TypeAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeEnd()
+ {
+ ByteTerm term(TypeAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm SubpatternBegin()
+ {
+ return ByteTerm(TypeSubpatternBegin);
+ }
+
+ static ByteTerm SubpatternEnd()
+ {
+ return ByteTerm(TypeSubpatternEnd);
+ }
+
+ bool invert()
+ {
+ return invertOrCapture;
+ }
+
+ bool capture()
+ {
+ return invertOrCapture;
+ }
+};
+
+class ByteDisjunction {
+public:
+ ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
+ : m_numSubpatterns(numSubpatterns)
+ , m_frameSize(frameSize)
+ {
+ }
+
+ Vector<ByteTerm> terms;
+ unsigned m_numSubpatterns;
+ unsigned m_frameSize;
+};
+
+struct BytecodePattern {
+ BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
+ : m_body(body)
+ , m_ignoreCase(pattern.m_ignoreCase)
+ , m_multiline(pattern.m_multiline)
+ {
+ newlineCharacterClass = pattern.newlineCharacterClass();
+ wordcharCharacterClass = pattern.wordcharCharacterClass();
+
+ m_allParenthesesInfo.append(allParenthesesInfo);
+ m_userCharacterClasses.append(pattern.m_userCharacterClasses);
+ // 'Steal' the RegexPattern's CharacterClasses! We clear its
+ // array, so that it won't delete them on destruction. We'll
+ // take responsibility for that.
+ pattern.m_userCharacterClasses.clear();
+ }
+
+ ~BytecodePattern()
+ {
+ deleteAllValues(m_allParenthesesInfo);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ OwnPtr<ByteDisjunction> m_body;
+ bool m_ignoreCase;
+ bool m_multiline;
+
+ CharacterClass* newlineCharacterClass;
+ CharacterClass* wordcharCharacterClass;
+private:
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+ Vector<CharacterClass*> m_userCharacterClasses;
+};
+
+BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexInterpreter_h