diff options
author | Upstream <upstream-import@none> | 1970-01-12 13:46:40 +0000 |
---|---|---|
committer | Upstream <upstream-import@none> | 1970-01-12 13:46:40 +0000 |
commit | d8543bb6618c17b12da906afa77d216f58cf4058 (patch) | |
tree | c58dc05ed86825bd0ef8d305d58c8205106b540f /WebCore/platform/text/TextEncodingRegistry.cpp | |
download | external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.zip external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.tar.gz external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.tar.bz2 |
external/webkit r30707
Diffstat (limited to 'WebCore/platform/text/TextEncodingRegistry.cpp')
-rw-r--r-- | WebCore/platform/text/TextEncodingRegistry.cpp | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp new file mode 100644 index 0000000..a7ad879 --- /dev/null +++ b/WebCore/platform/text/TextEncodingRegistry.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextEncodingRegistry.h" + +#include "PlatformString.h" +#include "TextCodecLatin1.h" +#include "TextCodecUserDefined.h" +#include "TextCodecUTF16.h" +#include <wtf/ASCIICType.h> +#include <wtf/Assertions.h> +#include <wtf/HashMap.h> + +#if USE(ICU_UNICODE) +#include "TextCodecICU.h" +#endif +#if PLATFORM(MAC) +#include "TextCodecMac.h" +#endif +#if PLATFORM(QT) +#include "qt/TextCodecQt.h" +#endif + +using namespace WTF; + +namespace WebCore { + +const size_t maxEncodingNameLength = 63; + +// Hash for all-ASCII strings that does case folding and skips any characters +// that are not alphanumeric. If passed any non-ASCII characters, depends on +// the behavior of isalnum -- if that returns false as it does on OS X, then +// it will properly skip those characters too. +struct TextEncodingNameHash { + + // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's + // or anything like that. + static const unsigned PHI = 0x9e3779b9U; + + static bool equal(const char* s1, const char* s2) + { + char c1; + char c2; + do { + do + c1 = *s1++; + while (c1 && !isASCIIAlphanumeric(c1)); + do + c2 = *s2++; + while (c2 && !isASCIIAlphanumeric(c2)); + if (toASCIILower(c1) != toASCIILower(c2)) + return false; + } while (c1 && c2); + return !c1 && !c2; + } + + // This algorithm is the one-at-a-time hash from: + // http://burtleburtle.net/bob/hash/hashfaq.html + // http://burtleburtle.net/bob/hash/doobs.html + static unsigned hash(const char* s) + { + unsigned h = PHI; + for (;;) { + char c; + do { + c = *s++; + if (!c) { + h += (h << 3); + h ^= (h >> 11); + h += (h << 15); + return h; + } + } while (!isASCIIAlphanumeric(c)); + h += toASCIILower(c); + h += (h << 10); + h ^= (h >> 6); + } + } + + static const bool safeToCompareToEmptyOrDeleted = false; +}; + +struct TextCodecFactory { + NewTextCodecFunction function; + const void* additionalData; + TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { } +}; + +typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap; +typedef HashMap<const char*, TextCodecFactory> TextCodecMap; + +static TextEncodingNameMap* textEncodingNameMap; +static TextCodecMap* textCodecMap; +static bool didExtendTextCodecMaps; + +#if ERROR_DISABLED + +static inline void checkExistingName(const char*, const char*) { } + +#else + +static void checkExistingName(const char* alias, const char* atomicName) +{ + const char* oldAtomicName = textEncodingNameMap->get(alias); + if (!oldAtomicName) + return; + if (oldAtomicName == atomicName) + return; + // Keep the warning silent about one case where we know this will happen. + if (strcmp(alias, "ISO-8859-8-I") == 0 + && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 + && strcmp(atomicName, "ISO_8859-8:1988") == 0) + return; + LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", + alias, oldAtomicName, atomicName); +} + +#endif + +static void addToTextEncodingNameMap(const char* alias, const char* name) +{ + ASSERT(strlen(alias) <= maxEncodingNameLength); + const char* atomicName = textEncodingNameMap->get(name); + ASSERT(strcmp(alias, name) == 0 || atomicName); + if (!atomicName) + atomicName = name; + checkExistingName(alias, atomicName); + textEncodingNameMap->add(alias, atomicName); +} + +static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData) +{ + TextEncoding encoding(name); + ASSERT(encoding.isValid()); + textCodecMap->add(encoding.name(), TextCodecFactory(function, additionalData)); +} + +static void buildBaseTextCodecMaps() +{ + textCodecMap = new TextCodecMap; + textEncodingNameMap = new TextEncodingNameMap; + + TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); + TextCodecLatin1::registerCodecs(addToTextCodecMap); + + TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); + TextCodecUTF16::registerCodecs(addToTextCodecMap); + + TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap); + TextCodecUserDefined::registerCodecs(addToTextCodecMap); + +#if USE(ICU_UNICODE) + TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap); + TextCodecICU::registerBaseCodecs(addToTextCodecMap); +#endif +} + +static void extendTextCodecMaps() +{ +#if USE(ICU_UNICODE) + TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap); + TextCodecICU::registerExtendedCodecs(addToTextCodecMap); +#endif + +#if USE(QT4_UNICODE) + TextCodecQt::registerEncodingNames(addToTextEncodingNameMap); + TextCodecQt::registerCodecs(addToTextCodecMap); +#endif + +#if PLATFORM(MAC) + TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); + TextCodecMac::registerCodecs(addToTextCodecMap); +#endif +} + +std::auto_ptr<TextCodec> newTextCodec(const TextEncoding& encoding) +{ + ASSERT(textCodecMap); + TextCodecFactory factory = textCodecMap->get(encoding.name()); + ASSERT(factory.function); + return factory.function(encoding, factory.additionalData); +} + +const char* atomicCanonicalTextEncodingName(const char* name) +{ + if (!name || !name[0]) + return 0; + if (!textEncodingNameMap) + buildBaseTextCodecMaps(); + if (const char* atomicName = textEncodingNameMap->get(name)) + return atomicName; + if (didExtendTextCodecMaps) + return 0; + extendTextCodecMaps(); + didExtendTextCodecMaps = true; + return textEncodingNameMap->get(name); +} + +const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length) +{ + char buffer[maxEncodingNameLength + 1]; + size_t j = 0; + for (size_t i = 0; i < length; ++i) { + UChar c = characters[i]; + if (isASCIIAlphanumeric(c)) { + if (j == maxEncodingNameLength) + return 0; + buffer[j++] = c; + } + } + buffer[j] = 0; + return atomicCanonicalTextEncodingName(buffer); +} + +bool noExtendedTextEncodingNameUsed() +{ + return !didExtendTextCodecMaps; +} + +} // namespace WebCore |