diff options
Diffstat (limited to 'Source/WebCore/platform/text/brew')
4 files changed, 661 insertions, 0 deletions
diff --git a/Source/WebCore/platform/text/brew/TextBoundariesBrew.cpp b/Source/WebCore/platform/text/brew/TextBoundariesBrew.cpp new file mode 100644 index 0000000..506bdcf --- /dev/null +++ b/Source/WebCore/platform/text/brew/TextBoundariesBrew.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2006 Zack Rusin <zack@kde.org> + * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextBoundaries.h" + +#include "NotImplemented.h" +#include "PlatformString.h" + +using namespace WTF::Unicode; + +namespace WebCore { + +int findNextWordFromIndex(const UChar* buffer, int len, int position, bool forward) +{ + notImplemented(); + return 0; +} + +void findWordBoundary(const UChar* buffer, int len, int position, int* start, int* end) +{ + if (position > len) { + *start = 0; + *end = 0; + return; + } + + String str(buffer, len); + + int currentPosition = position - 1; + String foundWord; + while (currentPosition >= 0 && isLetter(str[currentPosition])) { + UChar c = str[currentPosition]; + foundWord.insert(&c, 1, 0); + --currentPosition; + } + + // currentPosition == 0 means the first char is not letter + // currentPosition == -1 means we reached the beginning + int startPos = (currentPosition < 0) ? 0 : ++currentPosition; + currentPosition = position; + while (isLetter(str[currentPosition])) { + foundWord.append(str[currentPosition]); + ++currentPosition; + } + + *start = startPos; + *end = currentPosition; +} + +} diff --git a/Source/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp b/Source/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp new file mode 100644 index 0000000..7f46e4f --- /dev/null +++ b/Source/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#include "config.h" +#include "TextBreakIterator.h" + +#include "PlatformString.h" +#include <wtf/StdLibExtras.h> +#include <wtf/unicode/Unicode.h> + +using namespace WTF::Unicode; + +namespace WebCore { + +// Hack, not entirely correct +static inline bool isCharStop(UChar c) +{ + CharCategory charCategory = category(c); + return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00)); +} + +static inline bool isLineStop(UChar c) +{ + return category(c) != Separator_Line; +} + +static inline bool isSentenceStop(UChar c) +{ + return isPunct(c); +} + +class TextBreakIterator { +public: + void reset(const UChar* str, int len) + { + string = str; + length = len; + currentPos = 0; + } + virtual int first() = 0; + virtual int next() = 0; + virtual int previous() = 0; + int following(int position) + { + currentPos = position; + return next(); + } + int preceding(int position) + { + currentPos = position; + return previous(); + } + + int currentPos; + const UChar* string; + int length; +}; + +struct WordBreakIterator: TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +struct CharBreakIterator: TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +struct LineBreakIterator: TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +struct SentenceBreakIterator : TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +int WordBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int WordBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isSpace(string[currentPos])) + break; + if (isSpace(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int WordBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isSpace(string[currentPos])) + break; + if (isSpace(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +int CharBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int CharBreakIterator::next() +{ + if (currentPos >= length) + return -1; + ++currentPos; + while (currentPos < length && !isCharStop(string[currentPos])) + ++currentPos; + return currentPos; +} + +int CharBreakIterator::previous() +{ + if (currentPos <= 0) + return -1; + if (currentPos > length) + currentPos = length; + --currentPos; + while (currentPos > 0 && !isCharStop(string[currentPos])) + --currentPos; + return currentPos; +} + +int LineBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int LineBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isLineStop(string[currentPos])) + break; + if (isLineStop(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int LineBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isLineStop(string[currentPos])) + break; + if (isLineStop(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +int SentenceBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int SentenceBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isSentenceStop(string[currentPos])) + break; + if (isSentenceStop(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int SentenceBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isSentenceStop(string[currentPos])) + break; + if (isSentenceStop(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +TextBreakIterator* wordBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* characterBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* lineBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +int textBreakFirst(TextBreakIterator* breakIterator) +{ + return breakIterator->first(); +} + +int textBreakNext(TextBreakIterator* breakIterator) +{ + return breakIterator->next(); +} + +int textBreakPreceding(TextBreakIterator* breakIterator, int position) +{ + return breakIterator->preceding(position); +} + +int textBreakFollowing(TextBreakIterator* breakIterator, int position) +{ + return breakIterator->following(position); +} + +int textBreakCurrent(TextBreakIterator* breakIterator) +{ + return breakIterator->currentPos; +} + +bool isTextBreak(TextBreakIterator*, int) +{ + return true; +} + +TextBreakIterator* cursorMovementIterator(const UChar* string, int length) +{ + return characterBreakIterator(string, length); +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/brew/TextCodecBrew.cpp b/Source/WebCore/platform/text/brew/TextCodecBrew.cpp new file mode 100644 index 0000000..1f32298 --- /dev/null +++ b/Source/WebCore/platform/text/brew/TextCodecBrew.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2010 Company 100, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextCodecBrew.h" + +#include "AEEAppGen.h" +#include "AEEICharsetConv.h" +#include "NotImplemented.h" +#include "PlatformString.h" +#include <wtf/Assertions.h> +#include <wtf/text/CString.h> + +namespace WebCore { + +// FIXME: Not sure if there are Brew MP devices which use big endian. +const char* WebCore::TextCodecBrew::m_internalEncodingName = "UTF-16LE"; + +static PassOwnPtr<TextCodec> newTextCodecBrew(const TextEncoding& encoding, const void*) +{ + return new TextCodecBrew(encoding); +} + +void TextCodecBrew::registerBaseEncodingNames(EncodingNameRegistrar registrar) +{ + registrar("UTF-8", "UTF-8"); +} + +void TextCodecBrew::registerBaseCodecs(TextCodecRegistrar registrar) +{ + registrar("UTF-8", newTextCodecBrew, 0); +} + +void TextCodecBrew::registerExtendedEncodingNames(EncodingNameRegistrar registrar) +{ + // FIXME: Not sure how to enumerate all available encodings. + notImplemented(); +} + +void TextCodecBrew::registerExtendedCodecs(TextCodecRegistrar registrar) +{ + notImplemented(); +} + +TextCodecBrew::TextCodecBrew(const TextEncoding& encoding) + : m_charsetConverter(0) + , m_encoding(encoding) + , m_numBufferedBytes(0) +{ + String format = String::format("%s>%s", encoding.name(), m_internalEncodingName); + + IShell* shell = reinterpret_cast<AEEApplet*>(GETAPPINSTANCE())->m_pIShell; + AEECLSID classID = ISHELL_GetHandler(shell, AEEIID_ICharsetConv, format.latin1().data()); + ISHELL_CreateInstance(shell, classID, reinterpret_cast<void**>(&m_charsetConverter)); + + ASSERT(m_charsetConverter); +} + +TextCodecBrew::~TextCodecBrew() +{ + if (m_charsetConverter) + ICharsetConv_Release(m_charsetConverter); +} + +String TextCodecBrew::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) +{ + int code = ICharsetConv_Initialize(m_charsetConverter, m_encoding.name(), m_internalEncodingName, 0); + ASSERT(code == AEE_SUCCESS); + + Vector<UChar> result; + Vector<unsigned char> prefixedBytes(length); + + int srcSize; + unsigned char* srcBegin; + + if (m_numBufferedBytes) { + srcSize = length + m_numBufferedBytes; + prefixedBytes.grow(srcSize); + memcpy(prefixedBytes.data(), m_bufferedBytes, m_numBufferedBytes); + memcpy(prefixedBytes.data() + m_numBufferedBytes, bytes, length); + + srcBegin = prefixedBytes.data(); + + // all buffered bytes are consumed now + m_numBufferedBytes = 0; + } else { + srcSize = length; + srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(bytes)); + } + + unsigned char* src = srcBegin; + unsigned char* srcEnd = srcBegin + srcSize; + + Vector<UChar> dstBuffer(srcSize); + + while (src < srcEnd) { + int numCharsConverted; + unsigned char* dstBegin = reinterpret_cast<unsigned char*>(dstBuffer.data()); + unsigned char* dst = dstBegin; + int dstSize = dstBuffer.size() * sizeof(UChar); + + code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted); + ASSERT(code != AEE_ENOSUCH); + + if (code == AEE_EBUFFERTOOSMALL) { + // Increase the buffer and try it again. + dstBuffer.grow(dstBuffer.size() * 2); + continue; + } + + if (code == AEE_EBADITEM) { + sawError = true; + if (stopOnError) { + result.append(L'?'); + break; + } + + src++; + } + + if (code == AEE_EINCOMPLETEITEM) { + if (flush) { + LOG_ERROR("Partial bytes at end of input while flush requested."); + sawError = true; + return String(); + } + + m_numBufferedBytes = srcEnd - src; + memcpy(m_bufferedBytes, src, m_numBufferedBytes); + break; + } + + int numChars = (dst - dstBegin) / sizeof(UChar); + if (numChars > 0) + result.append(dstBuffer.data(), numChars); + } + + return String::adopt(result); +} + +CString TextCodecBrew::encode(const UChar* characters, size_t length, UnencodableHandling handling) +{ + if (!length) + return ""; + + unsigned int replacementCharacter = '?'; + + // FIXME: Impossible to handle EntitiesForUnencodables or URLEncodedEntitiesForUnencodables with ICharsetConv. + int code = ICharsetConv_Initialize(m_charsetConverter, m_internalEncodingName, m_encoding.name(), replacementCharacter); + ASSERT(code == AEE_SUCCESS); + + Vector<char> result; + + int srcSize = length * sizeof(UChar); + unsigned char* srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(characters)); + unsigned char* src = srcBegin; + unsigned char* srcEnd = srcBegin + srcSize; + + Vector<unsigned char> dstBuffer(length * sizeof(UChar)); + + while (src < srcEnd) { + int numCharsConverted; + unsigned char* dstBegin = dstBuffer.data(); + unsigned char* dst = dstBegin; + int dstSize = dstBuffer.size(); + + code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted); + ASSERT(code != AEE_EINCOMPLETEITEM); + + if (code == AEE_ENOSUCH) { + LOG_ERROR("Conversion error, Code=%d", code); + return CString(); + } + + if (code == AEE_EBUFFERTOOSMALL) { + // Increase the buffer and try it again. + dstBuffer.grow(dstBuffer.size() * 2); + continue; + } + + if (code == AEE_EBADITEM) + src += sizeof(UChar); // Skip the invalid character + + int numBytes = dst - dstBegin; + if (numBytes > 0) + result.append(dstBuffer.data(), numBytes); + } + + return CString(result.data(), result.size()); +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/brew/TextCodecBrew.h b/Source/WebCore/platform/text/brew/TextCodecBrew.h new file mode 100644 index 0000000..97e2c87 --- /dev/null +++ b/Source/WebCore/platform/text/brew/TextCodecBrew.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2010 Company 100, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextCodecBrew_h +#define TextCodecBrew_h + +#include "TextCodec.h" +#include "TextEncoding.h" + +typedef struct ICharsetConv ICharsetConv; + +namespace WebCore { + +class TextCodecBrew : public TextCodec { +public: + static void registerBaseEncodingNames(EncodingNameRegistrar); + static void registerBaseCodecs(TextCodecRegistrar); + + static void registerExtendedEncodingNames(EncodingNameRegistrar); + static void registerExtendedCodecs(TextCodecRegistrar); + + TextCodecBrew(const TextEncoding&); + virtual ~TextCodecBrew(); + + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); + +private: + TextEncoding m_encoding; + size_t m_numBufferedBytes; + unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character + ICharsetConv* m_charsetConverter; + + static const char* m_internalEncodingName; +}; + +} // namespace WebCore + +#endif // TextCodecBrew_h |