diff options
Diffstat (limited to 'Source/JavaScriptCore/yarr')
-rw-r--r-- | Source/JavaScriptCore/yarr/Yarr.h | 72 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.cpp (renamed from Source/JavaScriptCore/yarr/RegexInterpreter.cpp) | 35 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.h (renamed from Source/JavaScriptCore/yarr/RegexInterpreter.h) | 33 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrJIT.cpp (renamed from Source/JavaScriptCore/yarr/RegexJIT.cpp) | 137 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrJIT.h (renamed from Source/JavaScriptCore/yarr/RegexJIT.h) | 24 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrParser.h (renamed from Source/JavaScriptCore/yarr/RegexParser.h) | 46 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.cpp (renamed from Source/JavaScriptCore/yarr/RegexPattern.cpp) | 66 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.h (renamed from Source/JavaScriptCore/yarr/RegexPattern.h) | 25 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp | 59 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrSyntaxChecker.h | 38 |
10 files changed, 338 insertions, 197 deletions
diff --git a/Source/JavaScriptCore/yarr/Yarr.h b/Source/JavaScriptCore/yarr/Yarr.h new file mode 100644 index 0000000..57176bc --- /dev/null +++ b/Source/JavaScriptCore/yarr/Yarr.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef Yarr_h +#define Yarr_h + +#include "YarrInterpreter.h" +#include "YarrJIT.h" +#include "YarrPattern.h" + +namespace JSC { namespace Yarr { + +#define YarrStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoBackReference 2 +#define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative. +#define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1 +#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1 +#define YarrStackSpaceForBackTrackInfoParentheses 2 + +static const unsigned quantifyInfinite = UINT_MAX; + +// The below limit restricts the number of "recursive" match calls in order to +// avoid spending exponential time on complex regular expressions. +static const unsigned matchLimit = 1000000; + +enum JSRegExpResult { + JSRegExpMatch = 1, + JSRegExpNoMatch = 0, + JSRegExpErrorNoMatch = -1, + JSRegExpErrorHitLimit = -2, + JSRegExpErrorNoMemory = -3, + JSRegExpErrorInternal = -4 +}; + +PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); +int interpret(BytecodePattern*, const UChar* input, unsigned start, unsigned length, int* output); + +#if ENABLE(YARR_JIT) +void jitCompile(YarrPattern&, JSGlobalData*, YarrCodeBlock& jitObject); +int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output); +#endif + +} } // namespace JSC::Yarr + +#endif // Yarr_h + diff --git a/Source/JavaScriptCore/yarr/RegexInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp index 7769922..d2dbf23 100644 --- a/Source/JavaScriptCore/yarr/RegexInterpreter.cpp +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp @@ -25,9 +25,9 @@ */ #include "config.h" -#include "RegexInterpreter.h" +#include "YarrInterpreter.h" -#include "RegexPattern.h" +#include "Yarr.h" #include <wtf/BumpPointerAllocator.h> #ifndef NDEBUG @@ -1433,7 +1433,7 @@ class ByteCompiler { }; public: - ByteCompiler(RegexPattern& pattern) + ByteCompiler(YarrPattern& pattern) : m_pattern(pattern) { m_currentAlternativeIndex = 0; @@ -1825,14 +1825,14 @@ public: if (term.quantityType == QuantifierFixedCount) disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; else - alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); } else if (term.parentheses.isTerminal) { unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; - atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce); + atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); } else { @@ -1845,7 +1845,7 @@ public: } case PatternTerm::TypeParentheticalAssertion: { - unsigned alternativeFrameLocation = term.frameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; + unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; ASSERT(currentCountAlreadyChecked >= (unsigned)term.inputPosition); int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition; @@ -1861,31 +1861,30 @@ public: } private: - RegexPattern& m_pattern; + YarrPattern& m_pattern; OwnPtr<ByteDisjunction> m_bodyDisjunction; unsigned m_currentAlternativeIndex; Vector<ParenthesesStackEntry> m_parenthesesStack; Vector<ByteDisjunction*> m_allParenthesesInfo; }; -PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator* allocator) +PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) { return ByteCompiler(pattern).compile(allocator); } -int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output) +int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output) { - return Interpreter(regex, output, input, start, length).interpret(); + return Interpreter(bytecode, output, input, start, length).interpret(); } - -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter); -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass); -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference); -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative); -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion); -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce); -COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); } } diff --git a/Source/JavaScriptCore/yarr/RegexInterpreter.h b/Source/JavaScriptCore/yarr/YarrInterpreter.h index 0fd8a57..be703a9 100644 --- a/Source/JavaScriptCore/yarr/RegexInterpreter.h +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.h @@ -23,11 +23,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef RegexInterpreter_h -#define RegexInterpreter_h +#ifndef YarrInterpreter_h +#define YarrInterpreter_h -#include "RegexParser.h" -#include "RegexPattern.h" +#include "YarrPattern.h" #include <wtf/PassOwnPtr.h> #include <wtf/unicode/Unicode.h> @@ -38,21 +37,6 @@ using WTF::BumpPointerAllocator; namespace JSC { namespace Yarr { -// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed. - -// The below limit restricts the number of "recursive" match calls in order to -// avoid spending exponential time on complex regular expressions. -static const unsigned matchLimit = 1000000; - -enum JSRegExpResult { - JSRegExpMatch = 1, - JSRegExpNoMatch = 0, - JSRegExpErrorNoMatch = -1, - JSRegExpErrorHitLimit = -2, - JSRegExpErrorNoMemory = -3, - JSRegExpErrorInternal = -4 -}; - class ByteDisjunction; struct ByteTerm { @@ -329,7 +313,7 @@ public: }; struct BytecodePattern : FastAllocBase { - BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern, BumpPointerAllocator* allocator) + BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator) : m_body(body) , m_ignoreCase(pattern.m_ignoreCase) , m_multiline(pattern.m_multiline) @@ -341,7 +325,7 @@ struct BytecodePattern : FastAllocBase { m_allParenthesesInfo.append(allParenthesesInfo); m_userCharacterClasses.append(pattern.m_userCharacterClasses); - // 'Steal' the RegexPattern's CharacterClasses! We clear its + // 'Steal' the YarrPattern's CharacterClasses! We clear its // array, so that it won't delete them on destruction. We'll // take responsibility for that. pattern.m_userCharacterClasses.clear(); @@ -360,7 +344,7 @@ struct BytecodePattern : FastAllocBase { bool m_multiline; bool m_containsBeginChars; // Each BytecodePattern is associated with a RegExp, each RegExp is associated - // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regexAllocator. + // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator. BumpPointerAllocator* m_allocator; CharacterClass* newlineCharacterClass; @@ -373,9 +357,6 @@ private: Vector<CharacterClass*> m_userCharacterClasses; }; -PassOwnPtr<BytecodePattern> byteCompileRegex(RegexPattern& pattern, BumpPointerAllocator*); -int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output); - } } // namespace JSC::Yarr -#endif // RegexInterpreter_h +#endif // YarrInterpreter_h diff --git a/Source/JavaScriptCore/yarr/RegexJIT.cpp b/Source/JavaScriptCore/yarr/YarrJIT.cpp index 50fe6db..ae59cba 100644 --- a/Source/JavaScriptCore/yarr/RegexJIT.cpp +++ b/Source/JavaScriptCore/yarr/YarrJIT.cpp @@ -24,13 +24,13 @@ */ #include "config.h" -#include "RegexJIT.h" +#include "YarrJIT.h" #include "ASCIICType.h" #include "JSGlobalData.h" #include "LinkBuffer.h" #include "MacroAssembler.h" -#include "RegexParser.h" +#include "Yarr.h" #if ENABLE(YARR_JIT) @@ -38,8 +38,8 @@ using namespace WTF; namespace JSC { namespace Yarr { -class RegexGenerator : private MacroAssembler { - friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); +class YarrGenerator : private MacroAssembler { + friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); #if CPU(ARM) static const RegisterID input = ARMRegisters::r0; @@ -299,13 +299,25 @@ class RegexGenerator : private MacroAssembler { addJump(jump); } + IndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) + : m_stackOffset(stackOffset) + { + addDataLabel(dataLabel); + } + void addJump(Jump jump) { m_relJumps.append(jump); } + + void addDataLabel(DataLabelPtr dataLabel) + { + m_dataLabelPtrVector.append(dataLabel); + } int32_t m_stackOffset; JumpList m_relJumps; + Vector<DataLabelPtr, 16> m_dataLabelPtrVector; }; struct AlternativeBacktrackRecord { @@ -354,10 +366,31 @@ class RegexGenerator : private MacroAssembler { jumps.empty(); } + void addIndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) + { + IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); + + ASSERT(stackOffset >= 0); + + uint32_t offset = static_cast<uint32_t>(stackOffset); + + if (result == m_indirectJumpMap.end()) + m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, dataLabel)); + else + result->second->addDataLabel(dataLabel); + } + void emitIndirectJumpTable(MacroAssembler* masm) { for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) { IndirectJumpEntry* indJumpEntry = iter->second; + size_t size = indJumpEntry->m_dataLabelPtrVector.size(); + if (size) { + // Link any associated DataLabelPtr's with indirect jump via label + Label hereLabel = masm->label(); + for (size_t i = 0; i < size; ++i) + m_backtrackRecords.append(AlternativeBacktrackRecord(indJumpEntry->m_dataLabelPtrVector[i], hereLabel)); + } indJumpEntry->m_relJumps.link(masm); masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset)); delete indJumpEntry; @@ -383,7 +416,7 @@ class RegexGenerator : private MacroAssembler { return parenthesesTail; } - void emitParenthesesTail(RegexGenerator* generator) + void emitParenthesesTail(YarrGenerator* generator) { unsigned vectorSize = m_parenTails.size(); bool priorBacktrackFallThrough = false; @@ -430,7 +463,7 @@ class RegexGenerator : private MacroAssembler { m_parenTailsForIteration.clear(); } - void linkToNextIteration(RegexGenerator* generator) + void linkToNextIteration(YarrGenerator* generator) { m_jumpsToNextInteration.linkTo(m_nextIteration, generator); } @@ -605,6 +638,11 @@ class RegexGenerator : private MacroAssembler { m_dataLabelPtr = dp; } + void clearSubDataLabelPtr() + { + m_subDataLabelPtr = 0; + } + void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr) { m_subDataLabelPtr = subDataLabelPtr; @@ -652,7 +690,7 @@ class RegexGenerator : private MacroAssembler { m_backTrackJumps.append(masm->jump()); } - void jumpToBacktrack(RegexGenerator* generator, Jump jump) + void jumpToBacktrack(YarrGenerator* generator, Jump jump) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) @@ -667,7 +705,7 @@ class RegexGenerator : private MacroAssembler { m_backTrackJumps.append(jump); } - void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps) + void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) @@ -682,18 +720,7 @@ class RegexGenerator : private MacroAssembler { m_backTrackJumps.append(jumps); } - bool linkDataLabelToHereIfExists(RegexGenerator* generator) - { - if (hasDataLabel()) { - generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), generator->label())); - clearDataLabel(); - return true; - } - - return false; - } - - bool plantJumpToBacktrackIfExists(RegexGenerator* generator) + bool plantJumpToBacktrackIfExists(YarrGenerator* generator) { if (isJumpList()) { if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) @@ -721,7 +748,7 @@ class RegexGenerator : private MacroAssembler { return false; } - void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false) + void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) { Label hereLabel = generator->label(); @@ -743,7 +770,7 @@ class RegexGenerator : private MacroAssembler { clear(); } - void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false) + void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) { m_backTrackJumps.linkTo(label, generator); @@ -877,27 +904,36 @@ class RegexGenerator : private MacroAssembler { m_backtrack.jumpToBacktrack(masm); } - void jumpToBacktrack(RegexGenerator* generator, Jump jump) + void jumpToBacktrack(YarrGenerator* generator, Jump jump) { m_backtrack.jumpToBacktrack(generator, jump); } - void jumpToBacktrack(RegexGenerator* generator, JumpList& jumps) + void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) { m_backtrack.jumpToBacktrack(generator, jumps); } - bool plantJumpToBacktrackIfExists(RegexGenerator* generator) + bool plantJumpToBacktrackIfExists(YarrGenerator* generator) { return m_backtrack.plantJumpToBacktrackIfExists(generator); } - bool linkDataLabelToBacktrackIfExists(RegexGenerator* generator) + bool linkDataLabelToBacktrackIfExists(YarrGenerator* generator, DataLabelPtr dataLabel) { - if ((m_backtrack.isLabel()) && (m_backtrack.hasDataLabel())) { - generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_backtrack.getDataLabel(), m_backtrack.getLabel())); - m_backtrack.clearDataLabel(); - return true; + // If we have a stack offset backtrack destination, use it directly + if (m_backtrack.isStackOffset()) { + generator->m_expressionState.addIndirectJumpEntry(m_backtrack.getStackOffset(), dataLabel); + m_backtrack.clearSubDataLabelPtr(); + } else { + // Otherwise set the data label (which may be linked) + setBacktrackDataLabel(dataLabel); + + if ((m_backtrack.isLabel()) && (m_backtrack.hasDataLabel())) { + generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_backtrack.getDataLabel(), m_backtrack.getLabel())); + m_backtrack.clearDataLabel(); + return true; + } } return false; @@ -923,13 +959,13 @@ class RegexGenerator : private MacroAssembler { m_backtrack.setLabel(label); } - void linkAlternativeBacktracks(RegexGenerator* generator, bool nextIteration = false) + void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) { m_backtrack.linkAlternativeBacktracks(generator, nextIteration); m_linkedBacktrack = 0; } - void linkAlternativeBacktracksTo(RegexGenerator* generator, Label label, bool nextIteration = false) + void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) { m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration); } @@ -945,18 +981,12 @@ class RegexGenerator : private MacroAssembler { m_linkedBacktrack->linkToNextBacktrack(followonBacktrack); } - void chainBacktrackJumps(JumpList* jumpList) - { - if (m_linkedBacktrack && !(m_linkedBacktrack->hasDestination())) - m_linkedBacktrack->setBacktrackJumpList(jumpList); - } - BacktrackDestination& getBacktrackDestination() { return m_backtrack; } - void propagateBacktrackingFrom(RegexGenerator* generator, BacktrackDestination& backtrack, bool doJump = true) + void propagateBacktrackingFrom(YarrGenerator* generator, BacktrackDestination& backtrack, bool doJump = true) { if (doJump) m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps()); @@ -989,7 +1019,7 @@ class RegexGenerator : private MacroAssembler { { } - void processBacktracks(RegexGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough) + void processBacktracks(YarrGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough) { m_nonGreedyTryParentheses = nonGreedyTryParentheses; m_fallThrough = fallThrough; @@ -1014,8 +1044,6 @@ class RegexGenerator : private MacroAssembler { stateBacktrack.setBacktrackJumpList(&m_pattBacktrackJumps); stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens); } - - parenthesesState.chainBacktrackJumps(&m_pattBacktrackJumps); } void setNextIteration(Label nextIteration) @@ -1029,7 +1057,7 @@ class RegexGenerator : private MacroAssembler { m_pattBacktrackJumps.append(jump); } - bool generateCode(RegexGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough) + bool generateCode(YarrGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough) { const RegisterID indexTemporary = regT0; unsigned parenthesesFrameLocation = m_term.frameLocation; @@ -1565,11 +1593,9 @@ class RegexGenerator : private MacroAssembler { // Alternative did not match. - state.setBacktrackDataLabel(dataLabel); - // Do we have a backtrack destination? // if so, link the data label to it. - state.linkDataLabelToBacktrackIfExists(this); + state.linkDataLabelToBacktrackIfExists(this, dataLabel); if (!state.isLastAlternative() || countToCheck) state.linkAlternativeBacktracks(this); @@ -1605,7 +1631,7 @@ class RegexGenerator : private MacroAssembler { unsigned parenthesesFrameLocation = term.frameLocation; unsigned alternativeFrameLocation = parenthesesFrameLocation; if (term.quantityType != QuantifierFixedCount) - alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; // optimized case - no capture & no quantifier can be handled in a light-weight manner. if (!term.capture() && (term.quantityType == QuantifierFixedCount)) { @@ -1738,7 +1764,7 @@ class RegexGenerator : private MacroAssembler { ASSERT(term.quantityType == QuantifierFixedCount); unsigned parenthesesFrameLocation = term.frameLocation; - unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; + unsigned alternativeFrameLocation = parenthesesFrameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; @@ -2163,7 +2189,7 @@ class RegexGenerator : private MacroAssembler { } public: - RegexGenerator(RegexPattern& pattern) + YarrGenerator(YarrPattern& pattern) : m_pattern(pattern) , m_shouldFallBack(false) { @@ -2182,7 +2208,7 @@ public: generateDisjunction(m_pattern.m_body); } - void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject) + void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) { generate(); @@ -2196,17 +2222,20 @@ public: } private: - RegexPattern& m_pattern; + YarrPattern& m_pattern; bool m_shouldFallBack; GenerationState m_expressionState; }; -void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject) +void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject) { - RegexGenerator generator(pattern); - generator.compile(globalData, jitObject); + YarrGenerator(pattern).compile(globalData, jitObject); } +int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output) +{ + return jitObject.execute(input, start, length, output); +} }} diff --git a/Source/JavaScriptCore/yarr/RegexJIT.h b/Source/JavaScriptCore/yarr/YarrJIT.h index 5e3dca1..414b575 100644 --- a/Source/JavaScriptCore/yarr/RegexJIT.h +++ b/Source/JavaScriptCore/yarr/YarrJIT.h @@ -23,13 +23,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef RegexJIT_h -#define RegexJIT_h +#ifndef YarrJIT_h +#define YarrJIT_h #if ENABLE(YARR_JIT) #include "MacroAssembler.h" -#include "RegexPattern.h" #include "UString.h" #if CPU(X86) && !COMPILER(MSVC) @@ -45,16 +44,16 @@ class ExecutablePool; namespace Yarr { -class RegexCodeBlock { - typedef int (*RegexJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; +class YarrCodeBlock { + typedef int (*YarrJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; public: - RegexCodeBlock() + YarrCodeBlock() : m_needFallBack(false) { } - ~RegexCodeBlock() + ~YarrCodeBlock() { } @@ -64,7 +63,7 @@ public: int execute(const UChar* input, unsigned start, unsigned length, int* output) { - return reinterpret_cast<RegexJITCode>(m_ref.m_code.executableAddress())(input, start, length, output); + return reinterpret_cast<YarrJITCode>(m_ref.m_code.executableAddress())(input, start, length, output); } #if ENABLE(REGEXP_TRACING) @@ -76,15 +75,8 @@ private: bool m_needFallBack; }; -void jitCompileRegex(RegexPattern& pattern, JSGlobalData* globalData, RegexCodeBlock& jitObject); - -inline int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output) -{ - return jitObject.execute(input, start, length, output); -} - } } // namespace JSC::Yarr #endif -#endif // RegexJIT_h +#endif // YarrJIT_h diff --git a/Source/JavaScriptCore/yarr/RegexParser.h b/Source/JavaScriptCore/yarr/YarrParser.h index ec5f589..8c5741a 100644 --- a/Source/JavaScriptCore/yarr/RegexParser.h +++ b/Source/JavaScriptCore/yarr/YarrParser.h @@ -23,17 +23,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef RegexParser_h -#define RegexParser_h +#ifndef YarrParser_h +#define YarrParser_h #include "UString.h" -#include <limits.h> +#include "Yarr.h" #include <wtf/ASCIICType.h> #include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { -static const unsigned quantifyInfinite = UINT_MAX; +#define REGEXP_ERROR_PREFIX "Invalid regular expression: " enum BuiltInCharacterClassID { DigitClassID, @@ -667,37 +667,29 @@ private: /* * parse(): * - * This method calls regexBegin(), calls parseTokens() to parse over the input - * patterns, calls regexEnd() or regexError() as appropriate, and converts any + * This method calls parseTokens() to parse over the input and converts any * error code to a const char* for a result. */ const char* parse() { - m_delegate.regexBegin(); - if (m_size > MAX_PATTERN_SIZE) m_err = PatternTooLarge; else parseTokens(); ASSERT(atEndOfPattern() || m_err); - if (m_err) - m_delegate.regexError(); - else - m_delegate.regexEnd(); - // The order of this array must match the ErrorCode enum. static const char* errorMessages[NumberOfErrorCodes] = { 0, // NoError - "regular expression too large", - "numbers out of order in {} quantifier", - "nothing to repeat", - "missing )", - "unmatched parentheses", - "unrecognized character after (?", - "missing terminating ] for character class", - "range out of order in character class", - "\\ at end of pattern" + REGEXP_ERROR_PREFIX "regular expression too large", + REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier", + REGEXP_ERROR_PREFIX "nothing to repeat", + REGEXP_ERROR_PREFIX "missing )", + REGEXP_ERROR_PREFIX "unmatched parentheses", + REGEXP_ERROR_PREFIX "unrecognized character after (?", + REGEXP_ERROR_PREFIX "missing terminating ] for character class", + REGEXP_ERROR_PREFIX "range out of order in character class", + REGEXP_ERROR_PREFIX "\\ at end of pattern" }; return errorMessages[m_err]; @@ -839,14 +831,6 @@ private: * * void disjunction(); * - * void regexBegin(); - * void regexEnd(); - * void regexError(); - * - * Before any call recording tokens are made, regexBegin() will be called on the - * delegate once. Once parsing is complete either regexEnd() or regexError() will - * be called, as appropriate. - * * The regular expression is described by a sequence of assertion*() and atom*() * callbacks to the delegate, describing the terms in the regular expression. * Following an atom a quantifyAtom() call may occur to indicate that the previous @@ -884,4 +868,4 @@ const char* parse(Delegate& delegate, const UString& pattern, unsigned backRefer } } // namespace JSC::Yarr -#endif // RegexParser_h +#endif // YarrParser_h diff --git a/Source/JavaScriptCore/yarr/RegexPattern.cpp b/Source/JavaScriptCore/yarr/YarrPattern.cpp index e737d0e..112b65d 100644 --- a/Source/JavaScriptCore/yarr/RegexPattern.cpp +++ b/Source/JavaScriptCore/yarr/YarrPattern.cpp @@ -25,9 +25,10 @@ */ #include "config.h" +#include "YarrPattern.h" -#include "RegexInterpreter.h" -#include "RegexPattern.h" +#include "Yarr.h" +#include "YarrParser.h" #include <wtf/Vector.h> using namespace WTF; @@ -338,17 +339,20 @@ private: bool m_isCaseInsensitive; }; -class RegexPatternConstructor { +class YarrPatternConstructor { public: - RegexPatternConstructor(RegexPattern& pattern) + YarrPatternConstructor(YarrPattern& pattern) : m_pattern(pattern) , m_characterClassConstructor(pattern.m_ignoreCase) , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase) , m_invertParentheticalAssertion(false) { + m_pattern.m_body = new PatternDisjunction(); + m_alternative = m_pattern.m_body->addNewAlternative(); + m_pattern.m_disjunctions.append(m_pattern.m_body); } - ~RegexPatternConstructor() + ~YarrPatternConstructor() { } @@ -356,6 +360,10 @@ public: { m_pattern.reset(); m_characterClassConstructor.reset(); + + m_pattern.m_body = new PatternDisjunction(); + m_alternative = m_pattern.m_body->addNewAlternative(); + m_pattern.m_disjunctions.append(m_pattern.m_body); } void assertionBOL() @@ -605,19 +613,6 @@ public: m_alternative = m_alternative->m_parent->addNewAlternative(); } - void regexBegin() - { - m_pattern.m_body = new PatternDisjunction(); - m_alternative = m_pattern.m_body->addNewAlternative(); - m_pattern.m_disjunctions.append(m_pattern.m_body); - } - void regexEnd() - { - } - void regexError() - { - } - unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition) { alternative->m_hasFixedSize = true; @@ -636,7 +631,7 @@ public: case PatternTerm::TypeBackReference: term.inputPosition = currentInputPosition; term.frameLocation = currentCallFrameSize; - currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoBackReference; alternative->m_hasFixedSize = false; break; @@ -647,7 +642,7 @@ public: term.inputPosition = currentInputPosition; if (term.quantityType != QuantifierFixedCount) { term.frameLocation = currentCallFrameSize; - currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter; alternative->m_hasFixedSize = false; } else currentInputPosition += term.quantityCount; @@ -657,7 +652,7 @@ public: term.inputPosition = currentInputPosition; if (term.quantityType != QuantifierFixedCount) { term.frameLocation = currentCallFrameSize; - currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; alternative->m_hasFixedSize = false; } else currentInputPosition += term.quantityCount; @@ -668,20 +663,20 @@ public: term.frameLocation = currentCallFrameSize; if (term.quantityCount == 1 && !term.parentheses.isCopy) { if (term.quantityType != QuantifierFixedCount) - currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce; currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); // If quantity is fixed, then pre-check its minimum size. if (term.quantityType == QuantifierFixedCount) currentInputPosition += term.parentheses.disjunction->m_minimumSize; term.inputPosition = currentInputPosition; } else if (term.parentheses.isTerminal) { - currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal; currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); term.inputPosition = currentInputPosition; } else { term.inputPosition = currentInputPosition; setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition); - currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses; } // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length. alternative->m_hasFixedSize = false; @@ -690,7 +685,7 @@ public: case PatternTerm::TypeParentheticalAssertion: term.inputPosition = currentInputPosition; term.frameLocation = currentCallFrameSize; - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition); + currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition); break; } } @@ -702,7 +697,7 @@ public: unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition) { if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1)) - initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative; + initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative; unsigned minimumInputSize = UINT_MAX; unsigned maximumCallFrameSize = 0; @@ -927,7 +922,7 @@ public: } private: - RegexPattern& m_pattern; + YarrPattern& m_pattern; PatternAlternative* m_alternative; CharacterClassConstructor m_characterClassConstructor; BeginCharHelper m_beginCharHelper; @@ -935,10 +930,9 @@ private: bool m_invertParentheticalAssertion; }; - -static const char* compileRegex(const UString& patternString, RegexPattern& pattern) +const char* YarrPattern::compile(const UString& patternString) { - RegexPatternConstructor constructor(pattern); + YarrPatternConstructor constructor(*this); if (const char* error = parse(constructor, patternString)) return error; @@ -947,8 +941,8 @@ static const char* compileRegex(const UString& patternString, RegexPattern& patt // Quoting Netscape's "What's new in JavaScript 1.2", // "Note: if the number of left parentheses is less than the number specified // in \#, the \# is taken as an octal escape as described in the next row." - if (pattern.containsIllegalBackReference()) { - unsigned numSubpatterns = pattern.m_numSubpatterns; + if (containsIllegalBackReference()) { + unsigned numSubpatterns = m_numSubpatterns; constructor.reset(); #if !ASSERT_DISABLED @@ -957,7 +951,7 @@ static const char* compileRegex(const UString& patternString, RegexPattern& patt parse(constructor, patternString, numSubpatterns); ASSERT(!error); - ASSERT(numSubpatterns == pattern.m_numSubpatterns); + ASSERT(numSubpatterns == m_numSubpatterns); } constructor.checkForTerminalParentheses(); @@ -967,9 +961,9 @@ static const char* compileRegex(const UString& patternString, RegexPattern& patt constructor.setupBeginChars(); return 0; -}; +} -RegexPattern::RegexPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error) +YarrPattern::YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error) : m_ignoreCase(ignoreCase) , m_multiline(multiline) , m_containsBackreferences(false) @@ -985,7 +979,7 @@ RegexPattern::RegexPattern(const UString& pattern, bool ignoreCase, bool multili , nonspacesCached(0) , nonwordcharCached(0) { - *error = compileRegex(pattern, *this); + *error = compile(pattern); } } } diff --git a/Source/JavaScriptCore/yarr/RegexPattern.h b/Source/JavaScriptCore/yarr/YarrPattern.h index 6833dd6..2172dda 100644 --- a/Source/JavaScriptCore/yarr/RegexPattern.h +++ b/Source/JavaScriptCore/yarr/YarrPattern.h @@ -24,8 +24,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef RegexPattern_h -#define RegexPattern_h +#ifndef YarrPattern_h +#define YarrPattern_h #include <wtf/Vector.h> #include <wtf/unicode/Unicode.h> @@ -34,15 +34,6 @@ namespace JSC { namespace Yarr { -#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. -#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. -#define RegexStackSpaceForBackTrackInfoBackReference 2 -#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. -#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 -#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. -#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1 -#define RegexStackSpaceForBackTrackInfoParentheses 2 - struct PatternDisjunction; struct CharacterRange { @@ -282,7 +273,7 @@ struct PatternDisjunction : FastAllocBase { // You probably don't want to be calling these functions directly // (please to be calling newlineCharacterClass() et al on your -// friendly neighborhood RegexPattern instance to get nicely +// friendly neighborhood YarrPattern instance to get nicely // cached copies). CharacterClass* newlineCreate(); CharacterClass* digitsCreate(); @@ -316,10 +307,10 @@ struct BeginChar { unsigned mask; }; -struct RegexPattern { - RegexPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error); +struct YarrPattern { + YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error); - ~RegexPattern() + ~YarrPattern() { deleteAllValues(m_disjunctions); deleteAllValues(m_userCharacterClasses); @@ -410,6 +401,8 @@ struct RegexPattern { Vector<BeginChar> m_beginChars; private: + const char* compile(const UString& patternString); + CharacterClass* newlineCached; CharacterClass* digitsCached; CharacterClass* spacesCached; @@ -421,4 +414,4 @@ private: } } // namespace JSC::Yarr -#endif // RegexPattern_h +#endif // YarrPattern_h diff --git a/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp b/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp new file mode 100644 index 0000000..51fda94 --- /dev/null +++ b/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2011 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "YarrSyntaxChecker.h" + +#include "YarrParser.h" + +namespace JSC { namespace Yarr { + +class SyntaxChecker { +public: + void assertionBOL() {} + void assertionEOL() {} + void assertionWordBoundary(bool) {} + void atomPatternCharacter(UChar) {} + void atomBuiltInCharacterClass(BuiltInCharacterClassID, bool) {} + void atomCharacterClassBegin(bool = false) {} + void atomCharacterClassAtom(UChar) {} + void atomCharacterClassRange(UChar, UChar) {} + void atomCharacterClassBuiltIn(BuiltInCharacterClassID, bool) {} + void atomCharacterClassEnd() {} + void atomParenthesesSubpatternBegin(bool = true) {} + void atomParentheticalAssertionBegin(bool = false) {} + void atomParenthesesEnd() {} + void atomBackReference(unsigned) {} + void quantifyAtom(unsigned, unsigned, bool) {} + void disjunction() {} +}; + +const char* checkSyntax(const UString& pattern) +{ + SyntaxChecker syntaxChecker; + return parse(syntaxChecker, pattern); +} + +}} // JSC::YARR diff --git a/Source/JavaScriptCore/yarr/YarrSyntaxChecker.h b/Source/JavaScriptCore/yarr/YarrSyntaxChecker.h new file mode 100644 index 0000000..e48cb9e --- /dev/null +++ b/Source/JavaScriptCore/yarr/YarrSyntaxChecker.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2011 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef YarrSyntaxChecker_h +#define YarrSyntaxChecker_h + +#include <UString.h> + +namespace JSC { namespace Yarr { + +const char* checkSyntax(const UString& pattern); + +}} // JSC::YARR + +#endif // YarrSyntaxChecker_h + |