diff options
author | Steve Block <steveblock@google.com> | 2011-05-06 11:45:16 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2011-05-12 13:44:10 +0100 |
commit | cad810f21b803229eb11403f9209855525a25d57 (patch) | |
tree | 29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/platform/text/SuffixTree.h | |
parent | 121b0cf4517156d0ac5111caf9830c51b69bae8f (diff) | |
download | external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2 |
Merge WebKit at r75315: Initial merge by git.
Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
Diffstat (limited to 'Source/WebCore/platform/text/SuffixTree.h')
-rw-r--r-- | Source/WebCore/platform/text/SuffixTree.h | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/Source/WebCore/platform/text/SuffixTree.h b/Source/WebCore/platform/text/SuffixTree.h new file mode 100644 index 0000000..f11fd23 --- /dev/null +++ b/Source/WebCore/platform/text/SuffixTree.h @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2010 Adam Barth. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SuffixTree_h +#define SuffixTree_h + +#include "PlatformString.h" +#include <wtf/Vector.h> + +namespace WebCore { + +class UnicodeCodebook { +public: + static int codeWord(UChar c) { return c; } + enum { codeSize = 1 << 8 * sizeof(UChar) }; +}; + +class ASCIICodebook { +public: + static int codeWord(UChar c) { return c & (codeSize - 1); } + enum { codeSize = 1 << (8 * sizeof(char) - 1) }; +}; + +template<typename Codebook> +class SuffixTree { +public: + SuffixTree(const String& text, unsigned depth) + : m_depth(depth) + , m_leaf(true) + { + build(text); + } + + bool mightContain(const String& query) + { + Node* current = &m_root; + int limit = std::min(m_depth, query.length()); + for (int i = 0; i < limit; ++i) { + current = current->at(Codebook::codeWord(query[i])); + if (!current) + return false; + } + return true; + } + +private: + class Node { + public: + Node(bool isLeaf = false) + { + m_children.resize(Codebook::codeSize); + m_children.fill(0); + m_isLeaf = isLeaf; + } + + ~Node() + { + for (unsigned i = 0; i < m_children.size(); ++i) { + Node* child = m_children.at(i); + if (child && !child->m_isLeaf) + delete child; + } + } + + Node*& at(int codeWord) { return m_children.at(codeWord); } + + private: + typedef Vector<Node*, Codebook::codeSize> ChildrenVector; + + ChildrenVector m_children; + bool m_isLeaf; + }; + + void build(const String& text) + { + for (unsigned base = 0; base < text.length(); ++base) { + Node* current = &m_root; + unsigned limit = std::min(base + m_depth, text.length()); + for (unsigned offset = 0; base + offset < limit; ++offset) { + ASSERT(current != &m_leaf); + Node*& child = current->at(Codebook::codeWord(text[base + offset])); + if (!child) + child = base + offset + 1 == limit ? &m_leaf : new Node(); + current = child; + } + } + } + + Node m_root; + unsigned m_depth; + + // Instead of allocating a fresh empty leaf node for ever leaf in the tree + // (there can be a lot of these), we alias all the leaves to this "static" + // leaf node. + Node m_leaf; +}; + +} // namespace WebCore + +#endif // SuffixTree_h |