1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
/*
* Copyright (C) 2010 Adam Barth. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SuffixTree_h
#define SuffixTree_h
#include "PlatformString.h"
#include <wtf/Vector.h>
namespace WebCore {
class UnicodeCodebook {
public:
static int codeWord(UChar c) { return c; }
enum { codeSize = 1 << 8 * sizeof(UChar) };
};
class ASCIICodebook {
public:
static int codeWord(UChar c) { return c & (codeSize - 1); }
enum { codeSize = 1 << (8 * sizeof(char) - 1) };
};
template<typename Codebook>
class SuffixTree {
public:
SuffixTree(const String& text, unsigned depth)
: m_depth(depth)
, m_leaf(true)
{
build(text);
}
bool mightContain(const String& query)
{
Node* current = &m_root;
int limit = std::min(m_depth, query.length());
for (int i = 0; i < limit; ++i) {
current = current->at(Codebook::codeWord(query[i]));
if (!current)
return false;
}
return true;
}
private:
class Node {
public:
Node(bool isLeaf = false)
{
m_children.resize(Codebook::codeSize);
m_children.fill(0);
m_isLeaf = isLeaf;
}
~Node()
{
for (unsigned i = 0; i < m_children.size(); ++i) {
Node* child = m_children.at(i);
if (child && !child->m_isLeaf)
delete child;
}
}
Node*& at(int codeWord) { return m_children.at(codeWord); }
private:
typedef Vector<Node*, Codebook::codeSize> ChildrenVector;
ChildrenVector m_children;
bool m_isLeaf;
};
void build(const String& text)
{
for (unsigned base = 0; base < text.length(); ++base) {
Node* current = &m_root;
unsigned limit = std::min(base + m_depth, text.length());
for (unsigned offset = 0; base + offset < limit; ++offset) {
ASSERT(current != &m_leaf);
Node*& child = current->at(Codebook::codeWord(text[base + offset]));
if (!child)
child = base + offset + 1 == limit ? &m_leaf : new Node();
current = child;
}
}
}
Node m_root;
unsigned m_depth;
// Instead of allocating a fresh empty leaf node for ever leaf in the tree
// (there can be a lot of these), we alias all the leaves to this "static"
// leaf node.
Node m_leaf;
};
} // namespace WebCore
#endif // SuffixTree_h
|