/* * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "break_lines.h" #include "CharacterNames.h" #include "TextBreakIterator.h" #if PLATFORM(MAC) #include #endif namespace WebCore { static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak) { switch (ch) { case ' ': case '\n': case '\t': return true; case noBreakSpace: return treatNoBreakSpaceAsBreak; default: return false; } } // This differs from the Unicode algorithm only in that Unicode does not break // between a question mark and a vertical line (U+007C). static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . / 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ? 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 // } }; static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable); static inline bool shouldBreakAfter(UChar ch, UChar nextCh) { switch (ch) { // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false. // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer. case '?': return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh]; // Internet Explorer always allows breaking after a hyphen. case '-': case softHyphen: // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0 // which is likely to be resolved in Unicode 5.1 . // We may want to remove or conditionalize this workaround at some point. case ideographicComma: case ideographicFullStop: #ifdef ANDROID_LAYOUT // as '/' is used in uri which is always long, we would like to break it case '/': #endif return true; default: return false; } } static inline bool needsLineBreakIterator(UChar ch) { return ch > 0x7F && ch != noBreakSpace; } #if PLATFORM(MAC) && defined(BUILDING_ON_TIGER) static inline TextBreakLocatorRef lineBreakLocator() { TextBreakLocatorRef locator = 0; UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator); return locator; } #endif int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak) { #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) TextBreakIterator* breakIterator = 0; #endif int nextBreak = -1; UChar lastCh = pos > 0 ? str[pos - 1] : 0; for (int i = pos; i < len; i++) { UChar ch = str[i]; if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch)) return i; if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) { if (nextBreak < i && i) { #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) if (!breakIterator) breakIterator = lineBreakIterator(str, len); if (breakIterator) nextBreak = textBreakFollowing(breakIterator, i - 1); #else static TextBreakLocatorRef breakLocator = lineBreakLocator(); if (breakLocator) { UniCharArrayOffset nextUCBreak; if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0) nextBreak = nextUCBreak; } #endif } if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak)) return i; } lastCh = ch; } return len; } } // namespace WebCore