diff options
author | Steve Block <steveblock@google.com> | 2010-04-27 16:31:00 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2010-05-11 14:42:12 +0100 |
commit | dcc8cf2e65d1aa555cce12431a16547e66b469ee (patch) | |
tree | 92a8d65cd5383bca9749f5327fb5e440563926e6 /JavaScriptCore/yarr | |
parent | ccac38a6b48843126402088a309597e682f40fe6 (diff) | |
download | external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.zip external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.gz external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.bz2 |
Merge webkit.org at r58033 : Initial merge by git
Change-Id: If006c38561af287c50cd578d251629b51e4d8cd1
Diffstat (limited to 'JavaScriptCore/yarr')
-rw-r--r-- | JavaScriptCore/yarr/RegexCompiler.cpp | 107 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexCompiler.h | 4 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.cpp | 31 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexInterpreter.h | 4 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexJIT.cpp | 59 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexJIT.h | 4 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexParser.h | 4 | ||||
-rw-r--r-- | JavaScriptCore/yarr/RegexPattern.h | 29 |
8 files changed, 78 insertions, 164 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp index 9cd3d12..9fbe213 100644 --- a/JavaScriptCore/yarr/RegexCompiler.cpp +++ b/JavaScriptCore/yarr/RegexCompiler.cpp @@ -36,6 +36,8 @@ using namespace WTF; namespace JSC { namespace Yarr { +#include "RegExpJitTables.h" + class CharacterClassConstructor { public: CharacterClassConstructor(bool isCaseInsensitive = false) @@ -141,7 +143,7 @@ public: CharacterClass* charClass() { - CharacterClass* characterClass = new CharacterClass(); + CharacterClass* characterClass = new CharacterClass(0); characterClass->m_matches.append(m_matches); characterClass->m_ranges.append(m_ranges); @@ -233,105 +235,6 @@ private: Vector<CharacterRange> m_rangesUnicode; }; - -CharacterClass* newlineCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_matches.append('\n'); - characterClass->m_matches.append('\r'); - characterClass->m_matchesUnicode.append(0x2028); - characterClass->m_matchesUnicode.append(0x2029); - - return characterClass; -} - -CharacterClass* digitsCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_ranges.append(CharacterRange('0', '9')); - - return characterClass; -} - -CharacterClass* spacesCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_matches.append(' '); - characterClass->m_ranges.append(CharacterRange('\t', '\r')); - characterClass->m_matchesUnicode.append(0x00a0); - characterClass->m_matchesUnicode.append(0x1680); - characterClass->m_matchesUnicode.append(0x180e); - characterClass->m_matchesUnicode.append(0x2028); - characterClass->m_matchesUnicode.append(0x2029); - characterClass->m_matchesUnicode.append(0x202f); - characterClass->m_matchesUnicode.append(0x205f); - characterClass->m_matchesUnicode.append(0x3000); - characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a)); - - return characterClass; -} - -CharacterClass* wordcharCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_matches.append('_'); - characterClass->m_ranges.append(CharacterRange('0', '9')); - characterClass->m_ranges.append(CharacterRange('A', 'Z')); - characterClass->m_ranges.append(CharacterRange('a', 'z')); - - return characterClass; -} - -CharacterClass* nondigitsCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_ranges.append(CharacterRange(0, '0' - 1)); - characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f)); - characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff)); - - return characterClass; -} - -CharacterClass* nonspacesCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_ranges.append(CharacterRange(0, '\t' - 1)); - characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1)); - characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f)); - characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f)); - characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f)); - characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d)); - characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff)); - characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027)); - characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e)); - characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e)); - characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff)); - characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff)); - - return characterClass; -} - -CharacterClass* nonwordcharCreate() -{ - CharacterClass* characterClass = new CharacterClass(); - - characterClass->m_matches.append('`'); - characterClass->m_ranges.append(CharacterRange(0, '0' - 1)); - characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1)); - characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1)); - characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f)); - characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff)); - - return characterClass; -} - - class RegexPatternConstructor { public: RegexPatternConstructor(RegexPattern& pattern) @@ -469,6 +372,7 @@ public: void atomBackReference(unsigned subpatternId) { ASSERT(subpatternId); + m_pattern.m_shouldFallBack = true; m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId); if (subpatternId > m_pattern.m_numSubpatterns) { @@ -544,6 +448,9 @@ public: return; } + if (max > 1 && term.type == PatternTerm::TypeParenthesesSubpattern) + m_pattern.m_shouldFallBack = true; + if (min == 0) term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy); else if (min == max) diff --git a/JavaScriptCore/yarr/RegexCompiler.h b/JavaScriptCore/yarr/RegexCompiler.h index 3ed2be9..9d2443a 100644 --- a/JavaScriptCore/yarr/RegexCompiler.h +++ b/JavaScriptCore/yarr/RegexCompiler.h @@ -26,13 +26,11 @@ #ifndef RegexCompiler_h #define RegexCompiler_h -#include <wtf/Platform.h> - #if ENABLE(YARR) -#include <wtf/unicode/Unicode.h> #include "RegexParser.h" #include "RegexPattern.h" +#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp index d088086..c2cb1c2 100644 --- a/JavaScriptCore/yarr/RegexInterpreter.cpp +++ b/JavaScriptCore/yarr/RegexInterpreter.cpp @@ -280,20 +280,6 @@ public: return false; } - bool tryConsumeCharacter(int testChar) - { - if (input.atEnd()) - return false; - - int ch = input.read(); - - if (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch)) { - input.next(); - return true; - } - return false; - } - bool checkCharacter(int testChar, int inputPosition) { return testChar == input.readChecked(inputPosition); @@ -305,23 +291,6 @@ public: return (loChar == ch) || (hiChar == ch); } - bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert) - { - if (input.atEnd()) - return false; - - bool match = testCharacterClass(characterClass, input.read()); - - if (invert) - match = !match; - - if (match) { - input.next(); - return true; - } - return false; - } - bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition) { bool match = testCharacterClass(characterClass, input.readChecked(inputPosition)); diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h index 48c9a5e..e3c3122 100644 --- a/JavaScriptCore/yarr/RegexInterpreter.h +++ b/JavaScriptCore/yarr/RegexInterpreter.h @@ -26,13 +26,11 @@ #ifndef RegexInterpreter_h #define RegexInterpreter_h -#include <wtf/Platform.h> - #if ENABLE(YARR) -#include <wtf/unicode/Unicode.h> #include "RegexParser.h" #include "RegexPattern.h" +#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { diff --git a/JavaScriptCore/yarr/RegexJIT.cpp b/JavaScriptCore/yarr/RegexJIT.cpp index fcb8d86..340b53d 100644 --- a/JavaScriptCore/yarr/RegexJIT.cpp +++ b/JavaScriptCore/yarr/RegexJIT.cpp @@ -40,7 +40,6 @@ using namespace WTF; namespace JSC { namespace Yarr { - class RegexGenerator : private MacroAssembler { friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); @@ -54,6 +53,16 @@ class RegexGenerator : private MacroAssembler { static const RegisterID regT1 = ARMRegisters::r6; static const RegisterID returnRegister = ARMRegisters::r0; +#elif CPU(MIPS) + static const RegisterID input = MIPSRegisters::a0; + static const RegisterID index = MIPSRegisters::a1; + static const RegisterID length = MIPSRegisters::a2; + static const RegisterID output = MIPSRegisters::a3; + + static const RegisterID regT0 = MIPSRegisters::t4; + static const RegisterID regT1 = MIPSRegisters::t5; + + static const RegisterID returnRegister = MIPSRegisters::v0; #elif CPU(X86) static const RegisterID input = X86Registers::eax; static const RegisterID index = X86Registers::edx; @@ -145,6 +154,11 @@ class RegexGenerator : private MacroAssembler { void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) { + if (charClass->m_table) { + ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); + matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); + return; + } Jump unicodeFail; if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); @@ -599,9 +613,14 @@ class RegexGenerator : private MacroAssembler { ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); } + add32(Imm32(1), countRegister); add32(Imm32(1), index); - branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); + if (term.quantityCount != 0xffffffff) + branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); + else + jump(loop); + failures.append(jump()); Label backtrackBegin(this); @@ -636,7 +655,8 @@ class RegexGenerator : private MacroAssembler { loadFromFrame(term.frameLocation, countRegister); atEndOfInput().linkTo(hardFail, this); - branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); + if (term.quantityCount != 0xffffffff) + branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { readCharacter(state.inputOffset(), character); or32(Imm32(32), character); @@ -722,7 +742,11 @@ class RegexGenerator : private MacroAssembler { add32(Imm32(1), countRegister); add32(Imm32(1), index); - branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); + if (term.quantityCount != 0xffffffff) + branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); + else + jump(loop); + failures.append(jump()); Label backtrackBegin(this); @@ -1078,17 +1102,15 @@ class RegexGenerator : private MacroAssembler { break; case PatternTerm::TypeBackReference: - m_generationFailed = true; + ASSERT_NOT_REACHED(); break; case PatternTerm::TypeForwardReference: break; case PatternTerm::TypeParenthesesSubpattern: - if ((term.quantityCount == 1) && !term.parentheses.isCopy) - generateParenthesesSingle(state); - else - m_generationFailed = true; + ASSERT((term.quantityCount == 1) && !term.parentheses.isCopy); // must fallback to pcre before this point + generateParenthesesSingle(state); break; case PatternTerm::TypeParentheticalAssertion: @@ -1313,6 +1335,8 @@ class RegexGenerator : private MacroAssembler { push(ARMRegisters::r5); push(ARMRegisters::r6); move(ARMRegisters::r3, output); +#elif CPU(MIPS) + // Do nothing. #endif } @@ -1330,6 +1354,8 @@ class RegexGenerator : private MacroAssembler { pop(ARMRegisters::r6); pop(ARMRegisters::r5); pop(ARMRegisters::r4); +#elif CPU(MIPS) + // Do nothing #endif ret(); } @@ -1337,7 +1363,6 @@ class RegexGenerator : private MacroAssembler { public: RegexGenerator(RegexPattern& pattern) : m_pattern(pattern) - , m_generationFailed(false) { } @@ -1367,15 +1392,9 @@ public: jitObject.set(patchBuffer.finalizeCode()); } - bool generationFailed() - { - return m_generationFailed; - } - private: RegexPattern& m_pattern; Vector<AlternativeBacktrackRecord> m_backtrackRecords; - bool m_generationFailed; }; void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline) @@ -1387,13 +1406,13 @@ void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const numSubpatterns = pattern.m_numSubpatterns; - RegexGenerator generator(pattern); - generator.compile(globalData, jitObject); - - if (generator.generationFailed()) { + if (pattern.m_shouldFallBack) { JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine; jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error)); + } else { + RegexGenerator generator(pattern); + generator.compile(globalData, jitObject); } } diff --git a/JavaScriptCore/yarr/RegexJIT.h b/JavaScriptCore/yarr/RegexJIT.h index 935b9a3..7f9c16e 100644 --- a/JavaScriptCore/yarr/RegexJIT.h +++ b/JavaScriptCore/yarr/RegexJIT.h @@ -26,8 +26,6 @@ #ifndef RegexJIT_h #define RegexJIT_h -#include <wtf/Platform.h> - #if ENABLE(YARR_JIT) #include "MacroAssembler.h" @@ -68,7 +66,7 @@ public: JSRegExp* getFallback() { return m_fallback; } void setFallback(JSRegExp* fallback) { m_fallback = fallback; } - bool operator!() { return !m_ref.m_code.executableAddress(); } + bool operator!() { return (!m_ref.m_code.executableAddress() && !m_fallback); } void set(MacroAssembler::CodeRef ref) { m_ref = ref; } int execute(const UChar* input, unsigned start, unsigned length, int* output) diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h index 64e8463..c946c2e 100644 --- a/JavaScriptCore/yarr/RegexParser.h +++ b/JavaScriptCore/yarr/RegexParser.h @@ -26,14 +26,12 @@ #ifndef RegexParser_h #define RegexParser_h -#include <wtf/Platform.h> - #if ENABLE(YARR) #include <UString.h> +#include <limits.h> #include <wtf/ASCIICType.h> #include <wtf/unicode/Unicode.h> -#include <limits.h> namespace JSC { namespace Yarr { diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h index dd7512d..3271cc1 100644 --- a/JavaScriptCore/yarr/RegexPattern.h +++ b/JavaScriptCore/yarr/RegexPattern.h @@ -26,7 +26,6 @@ #ifndef RegexPattern_h #define RegexPattern_h -#include <wtf/Platform.h> #if ENABLE(YARR) @@ -57,11 +56,35 @@ struct CharacterRange { } }; +struct CharacterClassTable : RefCounted<CharacterClassTable> { + const char* m_table; + bool m_inverted; + static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted) + { + return adoptRef(new CharacterClassTable(table, inverted)); + } + +private: + CharacterClassTable(const char* table, bool inverted) + : m_table(table) + , m_inverted(inverted) + { + } +}; + struct CharacterClass : FastAllocBase { + // All CharacterClass instances have to have the full set of matches and ranges, + // they may have an optional table for faster lookups (which must match the + // specified matches and ranges) + CharacterClass(PassRefPtr<CharacterClassTable> table) + : m_table(table) + { + } Vector<UChar> m_matches; Vector<CharacterRange> m_ranges; Vector<UChar> m_matchesUnicode; Vector<CharacterRange> m_rangesUnicode; + RefPtr<CharacterClassTable> m_table; }; enum QuantifierType { @@ -248,6 +271,7 @@ struct RegexPattern { , m_multiline(multiline) , m_numSubpatterns(0) , m_maxBackReference(0) + , m_shouldFallBack(false) , newlineCached(0) , digitsCached(0) , spacesCached(0) @@ -269,6 +293,8 @@ struct RegexPattern { m_numSubpatterns = 0; m_maxBackReference = 0; + m_shouldFallBack = false; + newlineCached = 0; digitsCached = 0; spacesCached = 0; @@ -335,6 +361,7 @@ struct RegexPattern { bool m_multiline; unsigned m_numSubpatterns; unsigned m_maxBackReference; + bool m_shouldFallBack; PatternDisjunction* m_body; Vector<PatternDisjunction*, 4> m_disjunctions; Vector<CharacterClass*> m_userCharacterClasses; |