diff options
Diffstat (limited to 'JavaScriptCore/create_regex_tables')
-rw-r--r-- | JavaScriptCore/create_regex_tables | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/JavaScriptCore/create_regex_tables b/JavaScriptCore/create_regex_tables new file mode 100644 index 0000000..b436eee --- /dev/null +++ b/JavaScriptCore/create_regex_tables @@ -0,0 +1,112 @@ +# Copyright (C) 2010 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +types = { + "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]}, + "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]}, + "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, + "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a)]}, + "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xffff)]}, + "digits": { "UseTable" : False, "data": [('0', '9')]}, + "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] } +} +entriesPerLine = 50 +arrays = ""; +functions = ""; + +for name, classes in types.items(): + ranges = []; + size = 0; + for _class in classes["data"]: + if type(_class) == str: + ranges.append((ord(_class), ord(_class))) + elif type(_class) == int: + ranges.append((_class, _class)) + else: + (min, max) = _class; + if type(min) == str: + min = ord(min) + if type(max) == str: + max = ord(max) + if max > 0x7f and min <= 0x7f: + ranges.append((min, 0x7f)) + min = 0x80 + ranges.append((min,max)) + ranges.sort(); + + if classes["UseTable"] and (not "Inverse" in classes): + array = ("static const char _%sData[65536] = {\n" % name); + i = 0 + for (min,max) in ranges: + while i < min: + i = i + 1 + array += ('0,') + if (i % entriesPerLine == 0) and (i != 0): + array += ('\n') + while i <= max: + i = i + 1 + if (i == 65536): + array += ("1") + else: + array += ('1,') + if (i % entriesPerLine == 0) and (i != 0): + array += ('\n') + while i < 0xffff: + array += ("0,") + i = i + 1; + if (i % entriesPerLine == 0) and (i != 0): + array += ('\n') + if i == 0xffff: + array += ("0") + array += ("\n};\n\n"); + arrays += array + + # Generate createFunction: + function = ""; + function += ("CharacterClass* %sCreate()\n" % name) + function += ("{\n") + if classes["UseTable"]: + if "Inverse" in classes: + function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"])) + else: + function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name)) + else: + function += (" CharacterClass* characterClass = new CharacterClass(0);\n") + for (min, max) in ranges: + if (min == max): + if (min > 127): + function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min) + else: + function += (" characterClass->m_matches.append(0x%02x);\n" % min) + continue + if (min > 127) or (max > 127): + function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max)) + else: + function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max)) + function += (" return characterClass;\n") + function += ("}\n\n") + functions += function + +print(arrays) +print(functions) + |