summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/create_regex_tables
blob: b436eee11fc572419defff4b6035a5d0c496f531 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Copyright (C) 2010 Apple Inc. All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

types = {
    "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
    "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
    "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
    "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a)]},
    "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xffff)]},
    "digits": { "UseTable" : False, "data": [('0', '9')]},
    "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
}
entriesPerLine = 50
arrays = "";
functions = "";

for name, classes in types.items():
    ranges = [];
    size = 0;
    for _class in classes["data"]:
        if type(_class) == str:
            ranges.append((ord(_class), ord(_class)))
        elif type(_class) == int:
            ranges.append((_class, _class))
        else:
            (min, max) = _class;
            if type(min) == str:
                min = ord(min)
            if type(max) == str:
                max = ord(max)
            if max > 0x7f and min <= 0x7f:
                ranges.append((min, 0x7f))
                min = 0x80
            ranges.append((min,max))
    ranges.sort();
    
    if classes["UseTable"] and (not "Inverse" in classes):
        array = ("static const char _%sData[65536] = {\n" % name);
        i = 0
        for (min,max) in ranges:
            while i < min:
                i = i + 1
                array += ('0,')
                if (i % entriesPerLine == 0) and (i != 0):
                    array += ('\n')
            while i <= max:
                i = i + 1
                if (i == 65536):
                    array += ("1")
                else:
                    array += ('1,')
                if (i % entriesPerLine == 0) and (i != 0):
                    array += ('\n')
        while i < 0xffff:
            array += ("0,")
            i = i + 1;
            if (i % entriesPerLine == 0) and (i != 0):
                array += ('\n')
        if i == 0xffff:
            array += ("0")
        array += ("\n};\n\n");
        arrays += array
    
    # Generate createFunction:
    function = "";
    function += ("CharacterClass* %sCreate()\n" % name)
    function += ("{\n")
    if classes["UseTable"]:
        if "Inverse" in classes:
            function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"]))
        else:
            function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name))
    else:
        function += ("    CharacterClass* characterClass = new CharacterClass(0);\n")
    for (min, max) in ranges:
        if (min == max):
            if (min > 127):
                function += ("    characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
            else:
                function += ("    characterClass->m_matches.append(0x%02x);\n" % min)
            continue
        if (min > 127) or (max > 127):
            function += ("    characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
        else:
            function += ("    characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
    function += ("    return characterClass;\n")
    function += ("}\n\n")
    functions += function

print(arrays)
print(functions)