summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text/UnicodeRange.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/platform/text/UnicodeRange.cpp')
-rw-r--r--WebCore/platform/text/UnicodeRange.cpp462
1 files changed, 0 insertions, 462 deletions
diff --git a/WebCore/platform/text/UnicodeRange.cpp b/WebCore/platform/text/UnicodeRange.cpp
deleted file mode 100644
index 0373441..0000000
--- a/WebCore/platform/text/UnicodeRange.cpp
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Computer, Inc.
- *
- * Portions are Copyright (C) 1998 Netscape Communications Corporation.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Alternatively, the contents of this file may be used under the terms
- * of either the Mozilla Public License Version 1.1, found at
- * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
- * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
- * (the "GPL"), in which case the provisions of the MPL or the GPL are
- * applicable instead of those above. If you wish to allow use of your
- * version of this file only under the terms of one of those two
- * licenses (the MPL or the GPL) and not to allow others to use your
- * version of this file under the LGPL, indicate your decision by
- * deletingthe provisions above and replace them with the notice and
- * other provisions required by the MPL or the GPL, as the case may be.
- * If you do not delete the provisions above, a recipient may use your
- * version of this file under any of the LGPL, the MPL or the GPL.
- */
-
-#include "config.h"
-#include "UnicodeRange.h"
-
-namespace WebCore {
-
-// This table depends on unicode range definitions.
-// Each item's index must correspond to a unicode range value
-// eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
-static const char* gUnicodeRangeToLangGroupTable[] =
-{
- "x-cyrillic",
- "el",
- "tr",
- "he",
- "ar",
- "x-baltic",
- "th",
- "ko",
- "ja",
- "zh-CN",
- "zh-TW",
- "x-devanagari",
- "x-tamil",
- "x-armn",
- "x-beng",
- "x-cans",
- "x-ethi",
- "x-geor",
- "x-gujr",
- "x-guru",
- "x-khmr",
- "x-mlym"
-};
-
-/**********************************************************************
- * Unicode subranges as defined in unicode 3.0
- * x-western, x-central-euro, tr, x-baltic -> latin
- * 0000 - 036f
- * 1e00 - 1eff
- * 2000 - 206f (general punctuation)
- * 20a0 - 20cf (currency symbols)
- * 2100 - 214f (letterlike symbols)
- * 2150 - 218f (Number Forms)
- * el -> greek
- * 0370 - 03ff
- * 1f00 - 1fff
- * x-cyrillic -> cyrillic
- * 0400 - 04ff
- * he -> hebrew
- * 0590 - 05ff
- * ar -> arabic
- * 0600 - 06ff
- * fb50 - fdff (arabic presentation forms)
- * fe70 - feff (arabic presentation forms b)
- * th - thai
- * 0e00 - 0e7f
- * ko -> korean
- * ac00 - d7af (hangul Syllables)
- * 1100 - 11ff (jamo)
- * 3130 - 318f (hangul compatibility jamo)
- * ja
- * 3040 - 309f (hiragana)
- * 30a0 - 30ff (katakana)
- * zh-CN
- * zh-TW
- *
- * CJK
- * 3100 - 312f (bopomofo)
- * 31a0 - 31bf (bopomofo extended)
- * 3000 - 303f (CJK Symbols and Punctuation)
- * 2e80 - 2eff (CJK radicals supplement)
- * 2f00 - 2fdf (Kangxi Radicals)
- * 2ff0 - 2fff (Ideographic Description Characters)
- * 3190 - 319f (kanbun)
- * 3200 - 32ff (Enclosed CJK letters and Months)
- * 3300 - 33ff (CJK compatibility)
- * 3400 - 4dbf (CJK Unified Ideographs Extension A)
- * 4e00 - 9faf (CJK Unified Ideographs)
- * f900 - fa5f (CJK Compatibility Ideographs)
- * fe30 - fe4f (CJK compatibility Forms)
- * ff00 - ffef (halfwidth and fullwidth forms)
- *
- * Armenian
- * 0530 - 058f
- * Sriac
- * 0700 - 074f
- * Thaana
- * 0780 - 07bf
- * Devanagari
- * 0900 - 097f
- * Bengali
- * 0980 - 09ff
- * Gurmukhi
- * 0a00 - 0a7f
- * Gujarati
- * 0a80 - 0aff
- * Oriya
- * 0b00 - 0b7f
- * Tamil
- * 0b80 - 0bff
- * Telugu
- * 0c00 - 0c7f
- * Kannada
- * 0c80 - 0cff
- * Malayalam
- * 0d00 - 0d7f
- * Sinhala
- * 0d80 - 0def
- * Lao
- * 0e80 - 0eff
- * Tibetan
- * 0f00 - 0fbf
- * Myanmar
- * 1000 - 109f
- * Georgian
- * 10a0 - 10ff
- * Ethiopic
- * 1200 - 137f
- * Cherokee
- * 13a0 - 13ff
- * Canadian Aboriginal Syllabics
- * 1400 - 167f
- * Ogham
- * 1680 - 169f
- * Runic
- * 16a0 - 16ff
- * Khmer
- * 1780 - 17ff
- * Mongolian
- * 1800 - 18af
- * Misc - superscripts and subscripts
- * 2070 - 209f
- * Misc - Combining Diacritical Marks for Symbols
- * 20d0 - 20ff
- * Misc - Arrows
- * 2190 - 21ff
- * Misc - Mathematical Operators
- * 2200 - 22ff
- * Misc - Miscellaneous Technical
- * 2300 - 23ff
- * Misc - Control picture
- * 2400 - 243f
- * Misc - Optical character recognition
- * 2440 - 2450
- * Misc - Enclose Alphanumerics
- * 2460 - 24ff
- * Misc - Box Drawing
- * 2500 - 257f
- * Misc - Block Elements
- * 2580 - 259f
- * Misc - Geometric Shapes
- * 25a0 - 25ff
- * Misc - Miscellaneous Symbols
- * 2600 - 267f
- * Misc - Dingbats
- * 2700 - 27bf
- * Misc - Braille Patterns
- * 2800 - 28ff
- * Yi Syllables
- * a000 - a48f
- * Yi radicals
- * a490 - a4cf
- * Alphabetic Presentation Forms
- * fb00 - fb4f
- * Misc - Combining half Marks
- * fe20 - fe2f
- * Misc - small form variants
- * fe50 - fe6f
- * Misc - Specials
- * fff0 - ffff
- *********************************************************************/
-
-static const unsigned cNumSubTables = 9;
-static const unsigned cSubTableSize = 16;
-
-static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
-{
- { // table for X---
- cRangeTableBase+1, //u0xxx
- cRangeTableBase+2, //u1xxx
- cRangeTableBase+3, //u2xxx
- cRangeSetCJK, //u3xxx
- cRangeSetCJK, //u4xxx
- cRangeSetCJK, //u5xxx
- cRangeSetCJK, //u6xxx
- cRangeSetCJK, //u7xxx
- cRangeSetCJK, //u8xxx
- cRangeSetCJK, //u9xxx
- cRangeTableBase+4, //uaxxx
- cRangeKorean, //ubxxx
- cRangeKorean, //ucxxx
- cRangeTableBase+5, //udxxx
- cRangePrivate, //uexxx
- cRangeTableBase+6 //ufxxx
- },
- { //table for 0X--
- cRangeSetLatin, //u00xx
- cRangeSetLatin, //u01xx
- cRangeSetLatin, //u02xx
- cRangeGreek, //u03xx XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
- cRangeCyrillic, //u04xx
- cRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
- cRangeArabic, //u06xx
- cRangeTertiaryTable, //u07xx
- cRangeUnassigned, //u08xx
- cRangeTertiaryTable, //u09xx
- cRangeTertiaryTable, //u0axx
- cRangeTertiaryTable, //u0bxx
- cRangeTertiaryTable, //u0cxx
- cRangeTertiaryTable, //u0dxx
- cRangeTertiaryTable, //u0exx
- cRangeTibetan, //u0fxx
- },
- { //table for 1x--
- cRangeTertiaryTable, //u10xx
- cRangeKorean, //u11xx
- cRangeEthiopic, //u12xx
- cRangeTertiaryTable, //u13xx
- cRangeCanadian, //u14xx
- cRangeCanadian, //u15xx
- cRangeTertiaryTable, //u16xx
- cRangeKhmer, //u17xx
- cRangeMongolian, //u18xx
- cRangeUnassigned, //u19xx
- cRangeUnassigned, //u1axx
- cRangeUnassigned, //u1bxx
- cRangeUnassigned, //u1cxx
- cRangeUnassigned, //u1dxx
- cRangeSetLatin, //u1exx
- cRangeGreek, //u1fxx
- },
- { //table for 2x--
- cRangeSetLatin, //u20xx
- cRangeSetLatin, //u21xx
- cRangeMathOperators, //u22xx
- cRangeMiscTechnical, //u23xx
- cRangeControlOpticalEnclose, //u24xx
- cRangeBoxBlockGeometrics, //u25xx
- cRangeMiscSymbols, //u26xx
- cRangeDingbats, //u27xx
- cRangeBraillePattern, //u28xx
- cRangeUnassigned, //u29xx
- cRangeUnassigned, //u2axx
- cRangeUnassigned, //u2bxx
- cRangeUnassigned, //u2cxx
- cRangeUnassigned, //u2dxx
- cRangeSetCJK, //u2exx
- cRangeSetCJK, //u2fxx
- },
- { //table for ax--
- cRangeYi, //ua0xx
- cRangeYi, //ua1xx
- cRangeYi, //ua2xx
- cRangeYi, //ua3xx
- cRangeYi, //ua4xx
- cRangeUnassigned, //ua5xx
- cRangeUnassigned, //ua6xx
- cRangeUnassigned, //ua7xx
- cRangeUnassigned, //ua8xx
- cRangeUnassigned, //ua9xx
- cRangeUnassigned, //uaaxx
- cRangeUnassigned, //uabxx
- cRangeKorean, //uacxx
- cRangeKorean, //uadxx
- cRangeKorean, //uaexx
- cRangeKorean, //uafxx
- },
- { //table for dx--
- cRangeKorean, //ud0xx
- cRangeKorean, //ud1xx
- cRangeKorean, //ud2xx
- cRangeKorean, //ud3xx
- cRangeKorean, //ud4xx
- cRangeKorean, //ud5xx
- cRangeKorean, //ud6xx
- cRangeKorean, //ud7xx
- cRangeSurrogate, //ud8xx
- cRangeSurrogate, //ud9xx
- cRangeSurrogate, //udaxx
- cRangeSurrogate, //udbxx
- cRangeSurrogate, //udcxx
- cRangeSurrogate, //uddxx
- cRangeSurrogate, //udexx
- cRangeSurrogate, //udfxx
- },
- { // table for fx--
- cRangePrivate, //uf0xx
- cRangePrivate, //uf1xx
- cRangePrivate, //uf2xx
- cRangePrivate, //uf3xx
- cRangePrivate, //uf4xx
- cRangePrivate, //uf5xx
- cRangePrivate, //uf6xx
- cRangePrivate, //uf7xx
- cRangePrivate, //uf8xx
- cRangeSetCJK, //uf9xx
- cRangeSetCJK, //ufaxx
- cRangeArabic, //ufbxx, includes alphabic presentation form
- cRangeArabic, //ufcxx
- cRangeArabic, //ufdxx
- cRangeArabic, //ufexx, includes Combining half marks,
- // CJK compatibility forms,
- // CJK compatibility forms,
- // small form variants
- cRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials
- },
- { //table for 0x0500 - 0x05ff
- cRangeCyrillic, //u050x
- cRangeCyrillic, //u051x
- cRangeCyrillic, //u052x
- cRangeArmenian, //u053x
- cRangeArmenian, //u054x
- cRangeArmenian, //u055x
- cRangeArmenian, //u056x
- cRangeArmenian, //u057x
- cRangeArmenian, //u058x
- cRangeHebrew, //u059x
- cRangeHebrew, //u05ax
- cRangeHebrew, //u05bx
- cRangeHebrew, //u05cx
- cRangeHebrew, //u05dx
- cRangeHebrew, //u05ex
- cRangeHebrew, //u05fx
- },
- { //table for 0xff00 - 0xffff
- cRangeSetCJK, //uff0x, fullwidth latin
- cRangeSetCJK, //uff1x, fullwidth latin
- cRangeSetCJK, //uff2x, fullwidth latin
- cRangeSetCJK, //uff3x, fullwidth latin
- cRangeSetCJK, //uff4x, fullwidth latin
- cRangeSetCJK, //uff5x, fullwidth latin
- cRangeSetCJK, //uff6x, halfwidth katakana
- cRangeSetCJK, //uff7x, halfwidth katakana
- cRangeSetCJK, //uff8x, halfwidth katakana
- cRangeSetCJK, //uff9x, halfwidth katakana
- cRangeSetCJK, //uffax, halfwidth hangul jamo
- cRangeSetCJK, //uffbx, halfwidth hangul jamo
- cRangeSetCJK, //uffcx, halfwidth hangul jamo
- cRangeSetCJK, //uffdx, halfwidth hangul jamo
- cRangeSetCJK, //uffex, fullwidth symbols
- cRangeSpecials, //ufffx, Specials
- },
-};
-
-// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
-// code points so that the number of entries in the tertiary range
-// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
-// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
-// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
-static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);
-
-static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
-{ //table for 0x0700 - 0x1600
- cRangeSyriac, //u070x
- cRangeThaana, //u078x
- cRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.)
- cRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.)
- cRangeDevanagari, //u090x
- cRangeBengali, //u098x
- cRangeGurmukhi, //u0a0x
- cRangeGujarati, //u0a8x
- cRangeOriya, //u0b0x
- cRangeTamil, //u0b8x
- cRangeTelugu, //u0c0x
- cRangeKannada, //u0c8x
- cRangeMalayalam, //u0d0x
- cRangeSinhala, //u0d8x
- cRangeThai, //u0e0x
- cRangeLao, //u0e8x
- cRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.)
- cRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.)
- cRangeMyanmar, //u100x
- cRangeGeorgian, //u108x
- cRangeKorean, //u110x place holder(resolved in the 2ndary tab.)
- cRangeKorean, //u118x place holder(resolved in the 2ndary tab.)
- cRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.)
- cRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.)
- cRangeEthiopic, //u130x
- cRangeCherokee, //u138x
- cRangeCanadian, //u140x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u148x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u150x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u158x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u160x
- cRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic
-};
-
-// A two level index is almost enough for locating a range, with the
-// exception of u03xx and u05xx. Since we don't really care about range for
-// combining diacritical marks in our font application, they are
-// not discriminated further. Future adoption of this method for other use
-// should be aware of this limitation. The implementation can be extended if
-// there is such a need.
-// For Indic, Southeast Asian scripts and some other scripts between
-// U+0700 and U+16FF, it's extended to the third level.
-unsigned int findCharUnicodeRange(UChar32 ch)
-{
- if (ch >= 0xFFFF)
- return 0;
-
- unsigned int range;
-
- //search the first table
- range = gUnicodeSubrangeTable[0][ch >> 12];
-
- if (range < cRangeTableBase)
- // we try to get a specific range
- return range;
-
- // otherwise, we have one more table to look at
- range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
- if (range < cRangeTableBase)
- return range;
- if (range < cRangeTertiaryTable)
- return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];
-
- // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
- return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
-}
-
-const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
-{
- if (cRangeSpecificItemNum > unicodeRange)
- return gUnicodeRangeToLangGroupTable[unicodeRange];
- return 0;
-}
-
-}