diff options
Diffstat (limited to 'Source/WebCore/page/UserContentURLPattern.cpp')
-rw-r--r-- | Source/WebCore/page/UserContentURLPattern.cpp | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/Source/WebCore/page/UserContentURLPattern.cpp b/Source/WebCore/page/UserContentURLPattern.cpp new file mode 100644 index 0000000..272284a --- /dev/null +++ b/Source/WebCore/page/UserContentURLPattern.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "UserContentURLPattern.h" +#include "KURL.h" +#include <wtf/StdLibExtras.h> + +namespace WebCore { + +bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist) +{ + // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist. + // If there is no whitelist at all, then all URLs are assumed to be in the whitelist. + bool matchesWhitelist = !whitelist || whitelist->isEmpty(); + if (!matchesWhitelist) { + for (unsigned i = 0; i < whitelist->size(); ++i) { + UserContentURLPattern contentPattern(whitelist->at(i)); + if (contentPattern.matches(url)) { + matchesWhitelist = true; + break; + } + } + } + + bool matchesBlacklist = false; + if (blacklist) { + for (unsigned i = 0; i < blacklist->size(); ++i) { + UserContentURLPattern contentPattern(blacklist->at(i)); + if (contentPattern.matches(url)) { + matchesBlacklist = true; + break; + } + } + } + + return matchesWhitelist && !matchesBlacklist; +} + +bool UserContentURLPattern::parse(const String& pattern) +{ + DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://")); + + size_t schemeEndPos = pattern.find(schemeSeparator); + if (schemeEndPos == notFound) + return false; + + m_scheme = pattern.left(schemeEndPos); + + unsigned hostStartPos = schemeEndPos + schemeSeparator.length(); + if (hostStartPos >= pattern.length()) + return false; + + int pathStartPos = 0; + + if (equalIgnoringCase(m_scheme, "file")) + pathStartPos = hostStartPos; + else { + size_t hostEndPos = pattern.find("/", hostStartPos); + if (hostEndPos == notFound) + return false; + + m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos); + m_matchSubdomains = false; + + if (m_host == "*") { + // The pattern can be just '*', which means match all domains. + m_host = ""; + m_matchSubdomains = true; + } else if (m_host.startsWith("*.")) { + // The first component can be '*', which means to match all subdomains. + m_host = m_host.substring(2); // Length of "*." + m_matchSubdomains = true; + } + + // No other '*' can occur in the host. + if (m_host.find("*") != notFound) + return false; + + pathStartPos = hostEndPos; + } + + m_path = pattern.right(pattern.length() - pathStartPos); + + return true; +} + +bool UserContentURLPattern::matches(const KURL& test) const +{ + if (m_invalid) + return false; + + if (!equalIgnoringCase(test.protocol(), m_scheme)) + return false; + + if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test)) + return false; + + return matchesPath(test); +} + +bool UserContentURLPattern::matchesHost(const KURL& test) const +{ + const String& host = test.host(); + if (equalIgnoringCase(host, m_host)) + return true; + + if (!m_matchSubdomains) + return false; + + // If we're matching subdomains, and we have no host, that means the pattern + // was <scheme>://*/<whatever>, so we match anything. + if (!m_host.length()) + return true; + + // Check if the domain is a subdomain of our host. + if (!host.endsWith(m_host, false)) + return false; + + ASSERT(host.length() > m_host.length()); + + // Check that the character before the suffix is a period. + return host[host.length() - m_host.length() - 1] == '.'; +} + +struct MatchTester +{ + const String m_pattern; + unsigned m_patternIndex; + + const String m_test; + unsigned m_testIndex; + + MatchTester(const String& pattern, const String& test) + : m_pattern(pattern) + , m_patternIndex(0) + , m_test(test) + , m_testIndex(0) + { + } + + bool testStringFinished() const { return m_testIndex >= m_test.length(); } + bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); } + + void eatWildcard() + { + while (!patternStringFinished()) { + if (m_pattern[m_patternIndex] != '*') + return; + m_patternIndex++; + } + } + + void eatSameChars() + { + while (!patternStringFinished() && !testStringFinished()) { + if (m_pattern[m_patternIndex] == '*') + return; + if (m_pattern[m_patternIndex] != m_test[m_testIndex]) + return; + m_patternIndex++; + m_testIndex++; + } + } + + bool test() + { + // Eat all the matching chars. + eatSameChars(); + + // If the string is finished, then the pattern must be empty too, or contains + // only wildcards. + if (testStringFinished()) { + eatWildcard(); + if (patternStringFinished()) + return true; + return false; + } + + // Pattern is empty but not string, this is not a match. + if (patternStringFinished()) + return false; + + // If we don't encounter a *, then we're hosed. + if (m_pattern[m_patternIndex] != '*') + return false; + + while (!testStringFinished()) { + MatchTester nextMatch(*this); + nextMatch.m_patternIndex++; + if (nextMatch.test()) + return true; + m_testIndex++; + } + + // We reached the end of the string. Let's see if the pattern contains only + // wildcards. + eatWildcard(); + return patternStringFinished(); + } +}; + +bool UserContentURLPattern::matchesPath(const KURL& test) const +{ + MatchTester match(m_path, test.path()); + return match.test(); +} + +} // namespace WebCore |