summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/yarr
diff options
context:
space:
mode:
authorSteve Block <steveblock@google.com>2010-04-27 16:31:00 +0100
committerSteve Block <steveblock@google.com>2010-05-11 14:42:12 +0100
commitdcc8cf2e65d1aa555cce12431a16547e66b469ee (patch)
tree92a8d65cd5383bca9749f5327fb5e440563926e6 /JavaScriptCore/yarr
parentccac38a6b48843126402088a309597e682f40fe6 (diff)
downloadexternal_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.zip
external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.gz
external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.bz2
Merge webkit.org at r58033 : Initial merge by git
Change-Id: If006c38561af287c50cd578d251629b51e4d8cd1
Diffstat (limited to 'JavaScriptCore/yarr')
-rw-r--r--JavaScriptCore/yarr/RegexCompiler.cpp107
-rw-r--r--JavaScriptCore/yarr/RegexCompiler.h4
-rw-r--r--JavaScriptCore/yarr/RegexInterpreter.cpp31
-rw-r--r--JavaScriptCore/yarr/RegexInterpreter.h4
-rw-r--r--JavaScriptCore/yarr/RegexJIT.cpp59
-rw-r--r--JavaScriptCore/yarr/RegexJIT.h4
-rw-r--r--JavaScriptCore/yarr/RegexParser.h4
-rw-r--r--JavaScriptCore/yarr/RegexPattern.h29
8 files changed, 78 insertions, 164 deletions
diff --git a/JavaScriptCore/yarr/RegexCompiler.cpp b/JavaScriptCore/yarr/RegexCompiler.cpp
index 9cd3d12..9fbe213 100644
--- a/JavaScriptCore/yarr/RegexCompiler.cpp
+++ b/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -36,6 +36,8 @@ using namespace WTF;
namespace JSC { namespace Yarr {
+#include "RegExpJitTables.h"
+
class CharacterClassConstructor {
public:
CharacterClassConstructor(bool isCaseInsensitive = false)
@@ -141,7 +143,7 @@ public:
CharacterClass* charClass()
{
- CharacterClass* characterClass = new CharacterClass();
+ CharacterClass* characterClass = new CharacterClass(0);
characterClass->m_matches.append(m_matches);
characterClass->m_ranges.append(m_ranges);
@@ -233,105 +235,6 @@ private:
Vector<CharacterRange> m_rangesUnicode;
};
-
-CharacterClass* newlineCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_matches.append('\n');
- characterClass->m_matches.append('\r');
- characterClass->m_matchesUnicode.append(0x2028);
- characterClass->m_matchesUnicode.append(0x2029);
-
- return characterClass;
-}
-
-CharacterClass* digitsCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_ranges.append(CharacterRange('0', '9'));
-
- return characterClass;
-}
-
-CharacterClass* spacesCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_matches.append(' ');
- characterClass->m_ranges.append(CharacterRange('\t', '\r'));
- characterClass->m_matchesUnicode.append(0x00a0);
- characterClass->m_matchesUnicode.append(0x1680);
- characterClass->m_matchesUnicode.append(0x180e);
- characterClass->m_matchesUnicode.append(0x2028);
- characterClass->m_matchesUnicode.append(0x2029);
- characterClass->m_matchesUnicode.append(0x202f);
- characterClass->m_matchesUnicode.append(0x205f);
- characterClass->m_matchesUnicode.append(0x3000);
- characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
-
- return characterClass;
-}
-
-CharacterClass* wordcharCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_matches.append('_');
- characterClass->m_ranges.append(CharacterRange('0', '9'));
- characterClass->m_ranges.append(CharacterRange('A', 'Z'));
- characterClass->m_ranges.append(CharacterRange('a', 'z'));
-
- return characterClass;
-}
-
-CharacterClass* nondigitsCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
- characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
- characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-
- return characterClass;
-}
-
-CharacterClass* nonspacesCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
- characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
- characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
- characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
- characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
- characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
- characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
- characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
- characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
- characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
- characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
- characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
-
- return characterClass;
-}
-
-CharacterClass* nonwordcharCreate()
-{
- CharacterClass* characterClass = new CharacterClass();
-
- characterClass->m_matches.append('`');
- characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
- characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
- characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
- characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
- characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-
- return characterClass;
-}
-
-
class RegexPatternConstructor {
public:
RegexPatternConstructor(RegexPattern& pattern)
@@ -469,6 +372,7 @@ public:
void atomBackReference(unsigned subpatternId)
{
ASSERT(subpatternId);
+ m_pattern.m_shouldFallBack = true;
m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
if (subpatternId > m_pattern.m_numSubpatterns) {
@@ -544,6 +448,9 @@ public:
return;
}
+ if (max > 1 && term.type == PatternTerm::TypeParenthesesSubpattern)
+ m_pattern.m_shouldFallBack = true;
+
if (min == 0)
term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
else if (min == max)
diff --git a/JavaScriptCore/yarr/RegexCompiler.h b/JavaScriptCore/yarr/RegexCompiler.h
index 3ed2be9..9d2443a 100644
--- a/JavaScriptCore/yarr/RegexCompiler.h
+++ b/JavaScriptCore/yarr/RegexCompiler.h
@@ -26,13 +26,11 @@
#ifndef RegexCompiler_h
#define RegexCompiler_h
-#include <wtf/Platform.h>
-
#if ENABLE(YARR)
-#include <wtf/unicode/Unicode.h>
#include "RegexParser.h"
#include "RegexPattern.h"
+#include <wtf/unicode/Unicode.h>
namespace JSC { namespace Yarr {
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index d088086..c2cb1c2 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -280,20 +280,6 @@ public:
return false;
}
- bool tryConsumeCharacter(int testChar)
- {
- if (input.atEnd())
- return false;
-
- int ch = input.read();
-
- if (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch)) {
- input.next();
- return true;
- }
- return false;
- }
-
bool checkCharacter(int testChar, int inputPosition)
{
return testChar == input.readChecked(inputPosition);
@@ -305,23 +291,6 @@ public:
return (loChar == ch) || (hiChar == ch);
}
- bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert)
- {
- if (input.atEnd())
- return false;
-
- bool match = testCharacterClass(characterClass, input.read());
-
- if (invert)
- match = !match;
-
- if (match) {
- input.next();
- return true;
- }
- return false;
- }
-
bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
{
bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index 48c9a5e..e3c3122 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -26,13 +26,11 @@
#ifndef RegexInterpreter_h
#define RegexInterpreter_h
-#include <wtf/Platform.h>
-
#if ENABLE(YARR)
-#include <wtf/unicode/Unicode.h>
#include "RegexParser.h"
#include "RegexPattern.h"
+#include <wtf/unicode/Unicode.h>
namespace JSC { namespace Yarr {
diff --git a/JavaScriptCore/yarr/RegexJIT.cpp b/JavaScriptCore/yarr/RegexJIT.cpp
index fcb8d86..340b53d 100644
--- a/JavaScriptCore/yarr/RegexJIT.cpp
+++ b/JavaScriptCore/yarr/RegexJIT.cpp
@@ -40,7 +40,6 @@ using namespace WTF;
namespace JSC { namespace Yarr {
-
class RegexGenerator : private MacroAssembler {
friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
@@ -54,6 +53,16 @@ class RegexGenerator : private MacroAssembler {
static const RegisterID regT1 = ARMRegisters::r6;
static const RegisterID returnRegister = ARMRegisters::r0;
+#elif CPU(MIPS)
+ static const RegisterID input = MIPSRegisters::a0;
+ static const RegisterID index = MIPSRegisters::a1;
+ static const RegisterID length = MIPSRegisters::a2;
+ static const RegisterID output = MIPSRegisters::a3;
+
+ static const RegisterID regT0 = MIPSRegisters::t4;
+ static const RegisterID regT1 = MIPSRegisters::t5;
+
+ static const RegisterID returnRegister = MIPSRegisters::v0;
#elif CPU(X86)
static const RegisterID input = X86Registers::eax;
static const RegisterID index = X86Registers::edx;
@@ -145,6 +154,11 @@ class RegexGenerator : private MacroAssembler {
void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
{
+ if (charClass->m_table) {
+ ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
+ matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
+ return;
+ }
Jump unicodeFail;
if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
@@ -599,9 +613,14 @@ class RegexGenerator : private MacroAssembler {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
}
+
add32(Imm32(1), countRegister);
add32(Imm32(1), index);
- branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ if (term.quantityCount != 0xffffffff)
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ else
+ jump(loop);
+
failures.append(jump());
Label backtrackBegin(this);
@@ -636,7 +655,8 @@ class RegexGenerator : private MacroAssembler {
loadFromFrame(term.frameLocation, countRegister);
atEndOfInput().linkTo(hardFail, this);
- branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+ if (term.quantityCount != 0xffffffff)
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
readCharacter(state.inputOffset(), character);
or32(Imm32(32), character);
@@ -722,7 +742,11 @@ class RegexGenerator : private MacroAssembler {
add32(Imm32(1), countRegister);
add32(Imm32(1), index);
- branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ if (term.quantityCount != 0xffffffff)
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ else
+ jump(loop);
+
failures.append(jump());
Label backtrackBegin(this);
@@ -1078,17 +1102,15 @@ class RegexGenerator : private MacroAssembler {
break;
case PatternTerm::TypeBackReference:
- m_generationFailed = true;
+ ASSERT_NOT_REACHED();
break;
case PatternTerm::TypeForwardReference:
break;
case PatternTerm::TypeParenthesesSubpattern:
- if ((term.quantityCount == 1) && !term.parentheses.isCopy)
- generateParenthesesSingle(state);
- else
- m_generationFailed = true;
+ ASSERT((term.quantityCount == 1) && !term.parentheses.isCopy); // must fallback to pcre before this point
+ generateParenthesesSingle(state);
break;
case PatternTerm::TypeParentheticalAssertion:
@@ -1313,6 +1335,8 @@ class RegexGenerator : private MacroAssembler {
push(ARMRegisters::r5);
push(ARMRegisters::r6);
move(ARMRegisters::r3, output);
+#elif CPU(MIPS)
+ // Do nothing.
#endif
}
@@ -1330,6 +1354,8 @@ class RegexGenerator : private MacroAssembler {
pop(ARMRegisters::r6);
pop(ARMRegisters::r5);
pop(ARMRegisters::r4);
+#elif CPU(MIPS)
+ // Do nothing
#endif
ret();
}
@@ -1337,7 +1363,6 @@ class RegexGenerator : private MacroAssembler {
public:
RegexGenerator(RegexPattern& pattern)
: m_pattern(pattern)
- , m_generationFailed(false)
{
}
@@ -1367,15 +1392,9 @@ public:
jitObject.set(patchBuffer.finalizeCode());
}
- bool generationFailed()
- {
- return m_generationFailed;
- }
-
private:
RegexPattern& m_pattern;
Vector<AlternativeBacktrackRecord> m_backtrackRecords;
- bool m_generationFailed;
};
void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
@@ -1387,13 +1406,13 @@ void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const
numSubpatterns = pattern.m_numSubpatterns;
- RegexGenerator generator(pattern);
- generator.compile(globalData, jitObject);
-
- if (generator.generationFailed()) {
+ if (pattern.m_shouldFallBack) {
JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
+ } else {
+ RegexGenerator generator(pattern);
+ generator.compile(globalData, jitObject);
}
}
diff --git a/JavaScriptCore/yarr/RegexJIT.h b/JavaScriptCore/yarr/RegexJIT.h
index 935b9a3..7f9c16e 100644
--- a/JavaScriptCore/yarr/RegexJIT.h
+++ b/JavaScriptCore/yarr/RegexJIT.h
@@ -26,8 +26,6 @@
#ifndef RegexJIT_h
#define RegexJIT_h
-#include <wtf/Platform.h>
-
#if ENABLE(YARR_JIT)
#include "MacroAssembler.h"
@@ -68,7 +66,7 @@ public:
JSRegExp* getFallback() { return m_fallback; }
void setFallback(JSRegExp* fallback) { m_fallback = fallback; }
- bool operator!() { return !m_ref.m_code.executableAddress(); }
+ bool operator!() { return (!m_ref.m_code.executableAddress() && !m_fallback); }
void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
int execute(const UChar* input, unsigned start, unsigned length, int* output)
diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h
index 64e8463..c946c2e 100644
--- a/JavaScriptCore/yarr/RegexParser.h
+++ b/JavaScriptCore/yarr/RegexParser.h
@@ -26,14 +26,12 @@
#ifndef RegexParser_h
#define RegexParser_h
-#include <wtf/Platform.h>
-
#if ENABLE(YARR)
#include <UString.h>
+#include <limits.h>
#include <wtf/ASCIICType.h>
#include <wtf/unicode/Unicode.h>
-#include <limits.h>
namespace JSC { namespace Yarr {
diff --git a/JavaScriptCore/yarr/RegexPattern.h b/JavaScriptCore/yarr/RegexPattern.h
index dd7512d..3271cc1 100644
--- a/JavaScriptCore/yarr/RegexPattern.h
+++ b/JavaScriptCore/yarr/RegexPattern.h
@@ -26,7 +26,6 @@
#ifndef RegexPattern_h
#define RegexPattern_h
-#include <wtf/Platform.h>
#if ENABLE(YARR)
@@ -57,11 +56,35 @@ struct CharacterRange {
}
};
+struct CharacterClassTable : RefCounted<CharacterClassTable> {
+ const char* m_table;
+ bool m_inverted;
+ static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
+ {
+ return adoptRef(new CharacterClassTable(table, inverted));
+ }
+
+private:
+ CharacterClassTable(const char* table, bool inverted)
+ : m_table(table)
+ , m_inverted(inverted)
+ {
+ }
+};
+
struct CharacterClass : FastAllocBase {
+ // All CharacterClass instances have to have the full set of matches and ranges,
+ // they may have an optional table for faster lookups (which must match the
+ // specified matches and ranges)
+ CharacterClass(PassRefPtr<CharacterClassTable> table)
+ : m_table(table)
+ {
+ }
Vector<UChar> m_matches;
Vector<CharacterRange> m_ranges;
Vector<UChar> m_matchesUnicode;
Vector<CharacterRange> m_rangesUnicode;
+ RefPtr<CharacterClassTable> m_table;
};
enum QuantifierType {
@@ -248,6 +271,7 @@ struct RegexPattern {
, m_multiline(multiline)
, m_numSubpatterns(0)
, m_maxBackReference(0)
+ , m_shouldFallBack(false)
, newlineCached(0)
, digitsCached(0)
, spacesCached(0)
@@ -269,6 +293,8 @@ struct RegexPattern {
m_numSubpatterns = 0;
m_maxBackReference = 0;
+ m_shouldFallBack = false;
+
newlineCached = 0;
digitsCached = 0;
spacesCached = 0;
@@ -335,6 +361,7 @@ struct RegexPattern {
bool m_multiline;
unsigned m_numSubpatterns;
unsigned m_maxBackReference;
+ bool m_shouldFallBack;
PatternDisjunction* m_body;
Vector<PatternDisjunction*, 4> m_disjunctions;
Vector<CharacterClass*> m_userCharacterClasses;