summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text/mac
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/platform/text/mac')
-rw-r--r--WebCore/platform/text/mac/CharsetData.h37
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.c555
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.h44
-rw-r--r--WebCore/platform/text/mac/StringImplMac.mm31
-rw-r--r--WebCore/platform/text/mac/StringMac.mm41
-rw-r--r--WebCore/platform/text/mac/TextBoundaries.mm54
-rw-r--r--WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm72
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.cpp321
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.h65
-rw-r--r--WebCore/platform/text/mac/character-sets.txt1868
-rw-r--r--WebCore/platform/text/mac/mac-encodings.txt45
-rwxr-xr-xWebCore/platform/text/mac/make-charset-table.pl225
12 files changed, 3358 insertions, 0 deletions
diff --git a/WebCore/platform/text/mac/CharsetData.h b/WebCore/platform/text/mac/CharsetData.h
new file mode 100644
index 0000000..458cecb
--- /dev/null
+++ b/WebCore/platform/text/mac/CharsetData.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+namespace WebCore {
+
+ #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai
+
+ struct CharsetEntry {
+ const char* name;
+ ::TextEncoding encoding;
+ };
+
+ extern const CharsetEntry CharsetTable[];
+
+}
diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c
new file mode 100644
index 0000000..6dbc008
--- /dev/null
+++ b/WebCore/platform/text/mac/ShapeArabic.c
@@ -0,0 +1,555 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+* Copyright (C) 2007 Apple Inc. All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy of this
+* software and associated documentation files (the "Software"), to deal in the Software
+* without restriction, including without limitation the rights to use, copy, modify,
+* merge, publish, distribute, and/or sell copies of the Software, and to permit persons
+* to whom the Software is furnished to do so, provided that the above copyright notice(s)
+* and this permission notice appear in all copies of the Software and that both the above
+* copyright notice(s) and this permission notice appear in supporting documentation.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+* PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+* OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*
+* Except as contained in this notice, the name of a copyright holder shall not be used in
+* advertising or otherwise to promote the sale, use or other dealings in this Software
+* without prior written authorization of the copyright holder.
+*
+******************************************************************************
+*
+* Arabic letter shaping implemented by Ayman Roshdy
+*/
+
+#include "config.h"
+
+#if USE(ATSUI)
+
+#include "ShapeArabic.h"
+
+#include <unicode/utypes.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <unicode/ushape.h>
+#include <wtf/Assertions.h>
+
+/*
+ * ### TODO in general for letter shaping:
+ * - the letter shaping code is UTF-16-unaware; needs update
+ * + especially invertBuffer()?!
+ * - needs to handle the "Arabic Tail" that is used in some legacy codepages
+ * as a glyph fragment of wide-glyph letters
+ * + IBM Unicode conversion tables map it to U+200B (ZWSP)
+ * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
+ */
+
+/* definitions for Arabic letter shaping ------------------------------------ */
+
+#define IRRELEVANT 4
+#define LAMTYPE 16
+#define ALEFTYPE 32
+#define LINKR 1
+#define LINKL 2
+
+static const UChar IrrelevantPos[] = {
+ 0x0, 0x2, 0x4, 0x6,
+ 0x8, 0xA, 0xC, 0xE,
+};
+
+static const UChar araLink[178]=
+{
+ 1 + 32 + 256 * 0x11,/*0x0622*/
+ 1 + 32 + 256 * 0x13,/*0x0623*/
+ 1 + 256 * 0x15,/*0x0624*/
+ 1 + 32 + 256 * 0x17,/*0x0625*/
+ 1 + 2 + 256 * 0x19,/*0x0626*/
+ 1 + 32 + 256 * 0x1D,/*0x0627*/
+ 1 + 2 + 256 * 0x1F,/*0x0628*/
+ 1 + 256 * 0x23,/*0x0629*/
+ 1 + 2 + 256 * 0x25,/*0x062A*/
+ 1 + 2 + 256 * 0x29,/*0x062B*/
+ 1 + 2 + 256 * 0x2D,/*0x062C*/
+ 1 + 2 + 256 * 0x31,/*0x062D*/
+ 1 + 2 + 256 * 0x35,/*0x062E*/
+ 1 + 256 * 0x39,/*0x062F*/
+ 1 + 256 * 0x3B,/*0x0630*/
+ 1 + 256 * 0x3D,/*0x0631*/
+ 1 + 256 * 0x3F,/*0x0632*/
+ 1 + 2 + 256 * 0x41,/*0x0633*/
+ 1 + 2 + 256 * 0x45,/*0x0634*/
+ 1 + 2 + 256 * 0x49,/*0x0635*/
+ 1 + 2 + 256 * 0x4D,/*0x0636*/
+ 1 + 2 + 256 * 0x51,/*0x0637*/
+ 1 + 2 + 256 * 0x55,/*0x0638*/
+ 1 + 2 + 256 * 0x59,/*0x0639*/
+ 1 + 2 + 256 * 0x5D,/*0x063A*/
+ 0, 0, 0, 0, 0, /*0x063B-0x063F*/
+ 1 + 2, /*0x0640*/
+ 1 + 2 + 256 * 0x61,/*0x0641*/
+ 1 + 2 + 256 * 0x65,/*0x0642*/
+ 1 + 2 + 256 * 0x69,/*0x0643*/
+ 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
+ 1 + 2 + 256 * 0x71,/*0x0645*/
+ 1 + 2 + 256 * 0x75,/*0x0646*/
+ 1 + 2 + 256 * 0x79,/*0x0647*/
+ 1 + 256 * 0x7D,/*0x0648*/
+ 1 + 256 * 0x7F,/*0x0649*/
+ 1 + 2 + 256 * 0x81,/*0x064A*/
+ 4, 4, 4, 4, /*0x064B-0x064E*/
+ 4, 4, 4, 4, /*0x064F-0x0652*/
+ 4, 4, 4, 0, 0, /*0x0653-0x0657*/
+ 0, 0, 0, 0, /*0x0658-0x065B*/
+ 1 + 256 * 0x85,/*0x065C*/
+ 1 + 256 * 0x87,/*0x065D*/
+ 1 + 256 * 0x89,/*0x065E*/
+ 1 + 256 * 0x8B,/*0x065F*/
+ 0, 0, 0, 0, 0, /*0x0660-0x0664*/
+ 0, 0, 0, 0, 0, /*0x0665-0x0669*/
+ 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
+ 4, /*0x0670*/
+ 0, /*0x0671*/
+ 1 + 32, /*0x0672*/
+ 1 + 32, /*0x0673*/
+ 0, /*0x0674*/
+ 1 + 32, /*0x0675*/
+ 1, 1, /*0x0676-0x0677*/
+ 1+2, /*0x0678*/
+ 1+2 + 256 * 0x16,/*0x0679*/
+ 1+2 + 256 * 0x0E,/*0x067A*/
+ 1+2 + 256 * 0x02,/*0x067B*/
+ 1+2, 1+2, /*0x067C-0x067D*/
+ 1+2 + 256 * 0x06,/*0x067E*/
+ 1+2 + 256 * 0x12,/*0x067F*/
+ 1+2 + 256 * 0x0A,/*0x0680*/
+ 1+2, 1+2, /*0x0681-0x0682*/
+ 1+2 + 256 * 0x26,/*0x0683*/
+ 1+2 + 256 * 0x22,/*0x0684*/
+ 1+2, /*0x0685*/
+ 1+2 + 256 * 0x2A,/*0x0686*/
+ 1+2 + 256 * 0x2E,/*0x0687*/
+ 1 + 256 * 0x38,/*0x0688*/
+ 1, 1, 1, /*0x0689-0x068B*/
+ 1 + 256 * 0x34,/*0x068C*/
+ 1 + 256 * 0x32,/*0x068D*/
+ 1 + 256 * 0x36,/*0x068E*/
+ 1, 1, /*0x068F-0x0690*/
+ 1 + 256 * 0x3C,/*0x0691*/
+ 1, 1, 1, 1, 1, 1, /*0x0692-0x0697*/
+ 1 + 256 * 0x3A,/*0x0698*/
+ 1, /*0x0699*/
+ 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x069F*/
+ 1+2, 1+2, 1+2, 1+2, /*0x06A0-0x06A3*/
+ 1+2 + 256 * 0x2E,/*0x06A4*/
+ 1+2, /*0x06A5*/
+ 1+2 + 256 * 0x1E,/*0x06A6*/
+ 1+2, 1+2, /*0x06A7-0x06A8*/
+ 1+2 + 256 * 0x3E,/*0x06A9*/
+ 1+2, 1+2, 1+2, /*0x06AA-0x06AC*/
+ 1+2 + 256 * 0x83,/*0x06AD*/
+ 1+2, /*0x06AE*/
+ 1+2 + 256 * 0x42,/*0x06AF*/
+ 1+2, /*0x06B0*/
+ 1+2 + 256 * 0x4A,/*0x06B1*/
+ 1+2, /*0x06B2*/
+ 1+2 + 256 * 0x46,/*0x06B3*/
+ 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B4-0x06B9*/
+ 1+2, /*0x06BA*/ // FIXME: Seems to have a final form
+ 1+2 + 256 * 0x50,/*0x06BB*/
+ 1+2, 1+2, /*0x06BC-0x06BD*/
+ 1+2 + 256 * 0x5A,/*0x06BE*/
+ 1+2, /*0x06BF*/
+ 1, /*0x06C0*/
+ 1+2 + 256 * 0x56,/*0x06C1*/
+ 1+2, /*0x06C2*/
+ 1, 1, /*0x06C3-0x06C4*/
+ 1 + 256 * 0x90,/*0x06C5*/
+ 1 + 256 * 0x89,/*0x06C6*/
+ 1 + 256 * 0x87,/*0x06C7*/
+ 1 + 256 * 0x8B,/*0x06C8*/
+ 1 + 256 * 0x92,/*0x06C9*/
+ 1, /*0x06CA*/
+ 1 + 256 * 0x8E,/*0x06CB*/
+ 1+2 + 256 * 0xAC,/*0x06CC*/
+ 1, /*0x06CD*/
+ 1+2, /*0x06CE*/
+ 1, /*0x06CF*/
+ 1+2 + 256 * 0x94,/*0x06D0*/
+ 1+2, /*0x06D1*/
+ 1 + 256 * 0x5E,/*0x06D2*/
+ 1 + 256 * 0x60 /*0x06D3*/
+};
+
+static const UChar presLink[141]=
+{
+ 1 + 2, /*0xFE70*/
+ 1 + 2, /*0xFE71*/
+ 1 + 2, 0, 1+ 2, 0, 1+ 2, /*0xFE72-0xFE76*/
+ 1 + 2, /*0xFE77*/
+ 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE78-0xFE81*/
+ 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE82-0xFE85*/
+ 0, 0 + 32, 1 + 32, 0 + 32, /*0xFE86-0xFE89*/
+ 1 + 32, 0, 1, 0 + 32, /*0xFE8A-0xFE8D*/
+ 1 + 32, 0, 2, 1 + 2, /*0xFE8E-0xFE91*/
+ 1, 0 + 32, 1 + 32, 0, /*0xFE92-0xFE95*/
+ 2, 1 + 2, 1, 0, /*0xFE96-0xFE99*/
+ 1, 0, 2, 1 + 2, /*0xFE9A-0xFE9D*/
+ 1, 0, 2, 1 + 2, /*0xFE9E-0xFEA1*/
+ 1, 0, 2, 1 + 2, /*0xFEA2-0xFEA5*/
+ 1, 0, 2, 1 + 2, /*0xFEA6-0xFEA9*/
+ 1, 0, 2, 1 + 2, /*0xFEAA-0xFEAD*/
+ 1, 0, 1, 0, /*0xFEAE-0xFEB1*/
+ 1, 0, 1, 0, /*0xFEB2-0xFEB5*/
+ 1, 0, 2, 1+2, /*0xFEB6-0xFEB9*/
+ 1, 0, 2, 1+2, /*0xFEBA-0xFEBD*/
+ 1, 0, 2, 1+2, /*0xFEBE-0xFEC1*/
+ 1, 0, 2, 1+2, /*0xFEC2-0xFEC5*/
+ 1, 0, 2, 1+2, /*0xFEC6-0xFEC9*/
+ 1, 0, 2, 1+2, /*0xFECA-0xFECD*/
+ 1, 0, 2, 1+2, /*0xFECE-0xFED1*/
+ 1, 0, 2, 1+2, /*0xFED2-0xFED5*/
+ 1, 0, 2, 1+2, /*0xFED6-0xFED9*/
+ 1, 0, 2, 1+2, /*0xFEDA-0xFEDD*/
+ 1, 0, 2, 1+2, /*0xFEDE-0xFEE1*/
+ 1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
+ 1 + 16, 0, 2, 1+2, /*0xFEE6-0xFEE9*/
+ 1, 0, 2, 1+2, /*0xFEEA-0xFEED*/
+ 1, 0, 2, 1+2, /*0xFEEE-0xFEF1*/
+ 1, 0, 1, 0, /*0xFEF2-0xFEF5*/
+ 1, 0, 2, 1+2, /*0xFEF6-0xFEF9*/
+ 1, 0, 1, 0, /*0xFEFA-0xFEFD*/
+ 1, 0, 1, 0,
+ 1
+};
+
+static const UChar convertFEto06[] =
+{
+/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
+/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
+/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
+/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
+/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
+/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
+/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
+/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
+/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
+/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
+};
+
+static const UChar shapeTable[4][4][4]=
+{
+ { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
+ { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
+ { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
+ { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
+};
+
+/*
+ *Name : changeLamAlef
+ *Function : Converts the Alef characters into an equivalent
+ * LamAlef location in the 0x06xx Range, this is an
+ * intermediate stage in the operation of the program
+ * later it'll be converted into the 0xFExx LamAlefs
+ * in the shaping function.
+ */
+static UChar
+changeLamAlef(UChar ch) {
+
+ switch(ch) {
+ case 0x0622 :
+ return(0x065C);
+ break;
+ case 0x0623 :
+ return(0x065D);
+ break;
+ case 0x0625 :
+ return(0x065E);
+ break;
+ case 0x0627 :
+ return(0x065F);
+ break;
+ default :
+ return(0);
+ break;
+ }
+}
+
+/*
+ *Name : specialChar
+ *Function : Special Arabic characters need special handling in the shapeUnicode
+ * function, this function returns 1 or 2 for these special characters
+ */
+static int32_t
+specialChar(UChar ch) {
+
+ if( (ch>0x0621 && ch<0x0626)||(ch==0x0627)||(ch>0x062e && ch<0x0633)||
+ (ch>0x0647 && ch<0x064a)||(ch==0x0629) ) {
+ return (1);
+ }
+ else
+ if( ch>=0x064B && ch<= 0x0652 )
+ return (2);
+ else
+ if( (ch>=0x0653 && ch<= 0x0655) || ch == 0x0670 ||
+ (ch>=0xFE70 && ch<= 0xFE7F) )
+ return (3);
+ else
+ return (0);
+}
+
+/*
+ *Name : getLink
+ *Function : Resolves the link between the characters as
+ * Arabic characters have four forms :
+ * Isolated, Initial, Middle and Final Form
+ */
+static UChar
+getLink(UChar ch) {
+
+ if(ch >= 0x0622 && ch <= 0x06D3) {
+ return(araLink[ch-0x0622]);
+ } else if(ch == 0x200D) {
+ return(3);
+ } else if(ch >= 0x206D && ch <= 0x206F) {
+ return(4);
+ } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
+ return(presLink[ch-0xFE70]);
+ } else {
+ return(0);
+ }
+}
+
+/*
+ *Name : isTashkeelChar
+ *Function : Returns 1 for Tashkeel characters else return 0
+ */
+static int32_t
+isTashkeelChar(UChar ch) {
+
+ if( ch>=0x064B && ch<= 0x0652 )
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ *Name : shapeUnicode
+ *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
+ * arabic Unicode buffer in FExx Range
+ */
+static int32_t
+shapeUnicode(UChar *dest, int32_t sourceLength,
+ int32_t destSize,uint32_t options,
+ UErrorCode *pErrorCode,
+ int tashkeelFlag) {
+
+ int32_t i, iend;
+ int32_t prevPos, lastPos,Nx, Nw;
+ unsigned int Shape;
+ int32_t flag;
+ int32_t lamalef_found = 0;
+ UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
+ UChar wLamalef;
+
+ /*
+ * Converts the input buffer from FExx Range into 06xx Range
+ * to make sure that all characters are in the 06xx range
+ * even the lamalef is converted to the special region in
+ * the 06xx range
+ */
+ for (i = 0; i < sourceLength; i++) {
+ UChar inputChar = dest[i];
+ if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
+ dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
+ }
+ }
+
+ /* sets the index to the end of the buffer, together with the step point to -1 */
+ i = 0;
+ iend = sourceLength;
+
+ /*
+ * This function resolves the link between the characters .
+ * Arabic characters have four forms :
+ * Isolated Form, Initial Form, Middle Form and Final Form
+ */
+ currLink = getLink(dest[i]);
+
+ prevPos = i;
+ lastPos = i;
+ Nx = sourceLength + 2, Nw = 0;
+
+ while (i != iend) {
+ /* If high byte of currLink > 0 then more than one shape */
+ if ((currLink & 0xFF00) > 0 || isTashkeelChar(dest[i])) {
+ Nw = i + 1;
+ while (Nx >= sourceLength) { /* we need to know about next char */
+ if(Nw == iend) {
+ nextLink = 0;
+ Nx = -1;
+ } else {
+ nextLink = getLink(dest[Nw]);
+ if((nextLink & IRRELEVANT) == 0) {
+ Nx = Nw;
+ } else {
+ Nw = Nw + 1;
+ }
+ }
+ }
+
+ if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
+ lamalef_found = 1;
+ wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
+ if ( wLamalef != 0) {
+ dest[i] = ' '; /* The default case is to drop the Alef and replace */
+ dest[lastPos] =wLamalef; /* it by a space. */
+ i=lastPos;
+ }
+ lastLink = prevLink;
+ currLink = getLink(wLamalef);
+ }
+ /*
+ * get the proper shape according to link ability of neighbors
+ * and of character; depends on the order of the shapes
+ * (isolated, initial, middle, final) in the compatibility area
+ */
+ flag = specialChar(dest[i]);
+
+ Shape = shapeTable[nextLink & (LINKR + LINKL)]
+ [lastLink & (LINKR + LINKL)]
+ [currLink & (LINKR + LINKL)];
+
+ if (flag == 1) {
+ Shape = (Shape == 1 || Shape == 3) ? 1 : 0;
+ }
+ else
+ if(flag == 2) {
+ if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
+ dest[i] != 0x064C && dest[i] != 0x064D ) {
+ Shape = 1;
+ if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE )
+ Shape = 0;
+ }
+ else {
+ Shape = 0;
+ }
+ }
+
+ if(flag == 2) {
+ dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
+ }
+ else
+ dest[i] = (UChar)((dest[i] < 0x0670 ? 0xFE70 : 0xFB50) + (currLink >> 8) + Shape);
+ }
+
+ /* move one notch forward */
+ if ((currLink & IRRELEVANT) == 0) {
+ prevLink = lastLink;
+ lastLink = currLink;
+ prevPos = lastPos;
+ lastPos = i;
+ }
+
+ i++;
+ if (i == Nx) {
+ currLink = nextLink;
+ Nx = sourceLength + 2;
+ }
+ else if(i != iend) {
+ currLink = getLink(dest[i]);
+ }
+ }
+
+ destSize = sourceLength;
+
+ return destSize;
+}
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) {
+ int32_t destLength;
+
+ /* usual error checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
+ if( source==NULL || sourceLength<-1 ||
+ (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+ options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
+ (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* determine the source length */
+ if(sourceLength==-1) {
+ sourceLength=u_strlen(source);
+ }
+ if(sourceLength==0) {
+ return 0;
+ }
+
+ /* check that source and destination do not overlap */
+ if( dest!=NULL &&
+ ((source<=dest && dest<source+sourceLength) ||
+ (dest<=source && source<dest+destCapacity))
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
+ int32_t outputSize = sourceLength;
+
+ /* calculate destination size */
+ /* TODO: do we ever need to do this pure preflighting? */
+ ASSERT((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK);
+
+ if(outputSize>destCapacity) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return outputSize;
+ }
+
+ /* Start of Arabic letter shaping part */
+ memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
+
+ ASSERT((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL);
+
+ switch(options&U_SHAPE_LETTERS_MASK) {
+ case U_SHAPE_LETTERS_SHAPE :
+ /* Call the shaping function with tashkeel flag == 1 */
+ destLength = shapeUnicode(dest,sourceLength,destCapacity,options,pErrorCode,1);
+ break;
+ case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
+ /* Call the shaping function with tashkeel flag == 0 */
+ destLength = shapeUnicode(dest,sourceLength,destCapacity,options,pErrorCode,0);
+ break;
+ case U_SHAPE_LETTERS_UNSHAPE :
+ ASSERT_NOT_REACHED();
+ break;
+ default :
+ /* will never occur because of validity checks above */
+ destLength = 0;
+ break;
+ }
+
+ /* End of Arabic letter shaping part */
+ } else
+ ASSERT_NOT_REACHED();
+
+ ASSERT((options & U_SHAPE_DIGITS_MASK) == U_SHAPE_DIGITS_NOOP);
+
+ return sourceLength;
+}
+
+#endif // USE(ATSUI)
diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h
new file mode 100644
index 0000000..8aa577d
--- /dev/null
+++ b/WebCore/platform/text/mac/ShapeArabic.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ShapeArabic_h
+#define ShapeArabic_h
+
+#if USE(ATSUI)
+
+#include <unicode/ushape.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // USE(ATSUI)
+#endif // ShapeArabic_h
diff --git a/WebCore/platform/text/mac/StringImplMac.mm b/WebCore/platform/text/mac/StringImplMac.mm
new file mode 100644
index 0000000..2180b94
--- /dev/null
+++ b/WebCore/platform/text/mac/StringImplMac.mm
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "StringImpl.h"
+
+namespace WebCore {
+
+StringImpl::operator NSString *()
+{
+ return [NSString stringWithCharacters:m_data length:m_length];
+}
+
+}
diff --git a/WebCore/platform/text/mac/StringMac.mm b/WebCore/platform/text/mac/StringMac.mm
new file mode 100644
index 0000000..77942ea
--- /dev/null
+++ b/WebCore/platform/text/mac/StringMac.mm
@@ -0,0 +1,41 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+namespace WebCore {
+
+String::String(NSString* str)
+{
+ if (!str)
+ return;
+
+ CFIndex size = CFStringGetLength(reinterpret_cast<CFStringRef>(str));
+ if (size == 0)
+ m_impl = StringImpl::empty();
+ else {
+ Vector<UChar, 1024> buffer(size);
+ CFStringGetCharacters(reinterpret_cast<CFStringRef>(str), CFRangeMake(0, size), buffer.data());
+ m_impl = StringImpl::create(buffer.data(), size);
+ }
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextBoundaries.mm b/WebCore/platform/text/mac/TextBoundaries.mm
new file mode 100644
index 0000000..ff1dfd2
--- /dev/null
+++ b/WebCore/platform/text/mac/TextBoundaries.mm
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import "config.h"
+#import "TextBoundaries.h"
+
+namespace WebCore {
+
+void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+{
+ NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+ length:len freeWhenDone:NO];
+ NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+ NSRange range = [attr doubleClickAtIndex:(position >= len) ? len - 1 : position];
+ [attr release];
+ [string release];
+ *start = range.location;
+ *end = range.location + range.length;
+}
+
+int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+{
+ NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+ length:len freeWhenDone:NO];
+ NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+ int result = [attr nextWordFromIndex:position forward:forward];
+ [attr release];
+ [string release];
+ return result;
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
new file mode 100644
index 0000000..92983eb
--- /dev/null
+++ b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+static const int maxLocaleStringLength = 32;
+
+// This code was swiped from the CarbonCore UnicodeUtilities. One change from that is to use the empty
+// string instead of the "old locale model" as the ultimate fallback. This change is per the UnicodeUtilities
+// engineer.
+static void getTextBreakLocale(char localeStringBuffer[maxLocaleStringLength])
+{
+ // Empty string means "root locale", which is what we use if we can't use a pref.
+
+ // We get the parts string from AppleTextBreakLocale pref.
+ // If that fails then look for the first language in the AppleLanguages pref.
+ CFStringRef prefLocaleStr = (CFStringRef)CFPreferencesCopyValue(CFSTR("AppleTextBreakLocale"),
+ kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
+ if (!prefLocaleStr) {
+ CFArrayRef appleLangArr = (CFArrayRef)CFPreferencesCopyValue(CFSTR("AppleLanguages"),
+ kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
+ if (appleLangArr) {
+ // Take the topmost language. Retain so that we can blindly release later.
+ prefLocaleStr = (CFStringRef)CFArrayGetValueAtIndex(appleLangArr, 0);
+ if (prefLocaleStr)
+ CFRetain(prefLocaleStr);
+ CFRelease(appleLangArr);
+ }
+ }
+ if (prefLocaleStr) {
+ // Canonicalize pref string in case it is not in the canonical format.
+ CFStringRef canonLocaleCFStr = CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, prefLocaleStr);
+ if (canonLocaleCFStr) {
+ CFStringGetCString(canonLocaleCFStr, localeStringBuffer, maxLocaleStringLength, kCFStringEncodingASCII);
+ CFRelease(canonLocaleCFStr);
+ }
+ CFRelease(prefLocaleStr);
+ }
+}
+
+const char* currentTextBreakLocaleID()
+{
+ static char localeStringBuffer[maxLocaleStringLength];
+ static bool gotTextBreakLocale = false;
+ if (!gotTextBreakLocale) {
+ getTextBreakLocale(localeStringBuffer);
+ gotTextBreakLocale = true;
+ }
+ return localeStringBuffer;
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp
new file mode 100644
index 0000000..ac1f0fb
--- /dev/null
+++ b/WebCore/platform/text/mac/TextCodecMac.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecMac.h"
+
+#include "CString.h"
+#include "CharacterNames.h"
+#include "CharsetData.h"
+#include "PlatformString.h"
+#include <wtf/Assertions.h>
+
+using std::auto_ptr;
+using std::min;
+
+namespace WebCore {
+
+// We need to keep this because ICU doesn't support some of the encodings that we need:
+// <http://bugs.webkit.org/show_bug.cgi?id=4195>.
+
+const size_t ConversionBufferSize = 16384;
+
+static TECObjectRef cachedConverterTEC;
+static TECTextEncodingID cachedConverterEncoding = invalidEncoding;
+
+void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ TECTextEncodingID lastEncoding = invalidEncoding;
+ const char* lastName = 0;
+
+ for (size_t i = 0; CharsetTable[i].name; ++i) {
+ if (CharsetTable[i].encoding != lastEncoding) {
+ lastEncoding = CharsetTable[i].encoding;
+ lastName = CharsetTable[i].name;
+ }
+ registrar(CharsetTable[i].name, lastName);
+ }
+}
+
+static auto_ptr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData)
+{
+ return auto_ptr<TextCodec>(new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData)));
+}
+
+void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
+{
+ TECTextEncodingID lastEncoding = invalidEncoding;
+
+ for (size_t i = 0; CharsetTable[i].name; ++i)
+ if (CharsetTable[i].encoding != lastEncoding) {
+ registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding);
+ lastEncoding = CharsetTable[i].encoding;
+ }
+}
+
+TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
+ : m_encoding(encoding)
+ , m_numBufferedBytes(0)
+ , m_converterTEC(0)
+{
+}
+
+TextCodecMac::~TextCodecMac()
+{
+ releaseTECConverter();
+}
+
+void TextCodecMac::releaseTECConverter() const
+{
+ if (m_converterTEC) {
+ if (cachedConverterTEC != 0)
+ TECDisposeConverter(cachedConverterTEC);
+ cachedConverterTEC = m_converterTEC;
+ cachedConverterEncoding = m_encoding;
+ m_converterTEC = 0;
+ }
+}
+
+OSStatus TextCodecMac::createTECConverter() const
+{
+ bool cachedEncodingEqual = cachedConverterEncoding == m_encoding;
+ cachedConverterEncoding = invalidEncoding;
+
+ if (cachedEncodingEqual && cachedConverterTEC) {
+ m_converterTEC = cachedConverterTEC;
+ cachedConverterTEC = 0;
+ TECClearConverterContextInfo(m_converterTEC);
+ } else {
+ OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding,
+ CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
+ if (status)
+ return status;
+
+ TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask);
+ }
+
+ return noErr;
+}
+
+OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+ void *outputBuffer, int outputBufferLength, int& outputLength)
+{
+ OSStatus status;
+ unsigned long bytesRead = 0;
+ unsigned long bytesWritten = 0;
+
+ if (m_numBufferedBytes != 0) {
+ // Finish converting a partial character that's in our buffer.
+
+ // First, fill the partial character buffer with as many bytes as are available.
+ ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes));
+ const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes;
+ const int bytesToPutInBuffer = MIN(spaceInBuffer, inputBufferLength);
+ ASSERT(bytesToPutInBuffer != 0);
+ memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer);
+
+ // Now, do a conversion on the buffer.
+ status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead,
+ reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+ ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer);
+
+ if (status == kTECPartialCharErr && bytesRead == 0) {
+ // Handle the case where the partial character was not converted.
+ if (bytesToPutInBuffer >= spaceInBuffer) {
+ LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes));
+ m_numBufferedBytes = 0;
+ status = kTECUnmappableElementErr; // should never happen, but use this error code
+ } else {
+ // Tell the caller we read all the source bytes and keep them in the buffer.
+ m_numBufferedBytes += bytesToPutInBuffer;
+ bytesRead = bytesToPutInBuffer;
+ status = noErr;
+ }
+ } else {
+ // We are done with the partial character buffer.
+ // Also, we have read some of the bytes from the main buffer.
+ if (bytesRead > m_numBufferedBytes) {
+ bytesRead -= m_numBufferedBytes;
+ } else {
+ LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
+ bytesRead = 0;
+ }
+ m_numBufferedBytes = 0;
+ if (status == kTECPartialCharErr) {
+ // While there may be a partial character problem in the small buffer,
+ // we have to try again and not get confused and think there is a partial
+ // character problem in the large buffer.
+ status = noErr;
+ }
+ }
+ } else {
+ status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead,
+ static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+ ASSERT(static_cast<int>(bytesRead) <= inputBufferLength);
+ }
+
+ // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
+ if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0)
+ status = kTECOutputBufferFullStatus;
+
+ inputLength = bytesRead;
+ outputLength = bytesWritten;
+ return status;
+}
+
+String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+ // Get a converter for the passed-in encoding.
+ if (!m_converterTEC && createTECConverter() != noErr)
+ return String();
+
+ Vector<UChar> result;
+
+ const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
+ int sourceLength = length;
+ bool bufferWasFull = false;
+ UniChar buffer[ConversionBufferSize];
+
+ while ((sourceLength || bufferWasFull) && !sawError) {
+ int bytesRead = 0;
+ int bytesWritten = 0;
+ OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
+ ASSERT(bytesRead <= sourceLength);
+ sourcePointer += bytesRead;
+ sourceLength -= bytesRead;
+
+ switch (status) {
+ case noErr:
+ case kTECOutputBufferFullStatus:
+ break;
+ case kTextMalformedInputErr:
+ case kTextUndefinedElementErr:
+ // FIXME: Put FFFD character into the output string in this case?
+ TECClearConverterContextInfo(m_converterTEC);
+ if (stopOnError) {
+ sawError = true;
+ break;
+ }
+ if (sourceLength) {
+ sourcePointer += 1;
+ sourceLength -= 1;
+ }
+ break;
+ case kTECPartialCharErr: {
+ // Put the partial character into the buffer.
+ ASSERT(m_numBufferedBytes == 0);
+ const int bufferSize = sizeof(m_numBufferedBytes);
+ if (sourceLength < bufferSize) {
+ memcpy(m_bufferedBytes, sourcePointer, sourceLength);
+ m_numBufferedBytes = sourceLength;
+ } else {
+ LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
+ }
+ sourceLength = 0;
+ break;
+ }
+ default:
+ sawError = true;
+ return String();
+ }
+
+ ASSERT(!(bytesWritten % sizeof(UChar)));
+ result.append(buffer, bytesWritten / sizeof(UChar));
+
+ bufferWasFull = status == kTECOutputBufferFullStatus;
+ }
+
+ if (flush) {
+ unsigned long bytesWritten = 0;
+ TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
+ ASSERT(!(bytesWritten % sizeof(UChar)));
+ result.append(buffer, bytesWritten / sizeof(UChar));
+ }
+
+ String resultString = String::adopt(result);
+
+ // <rdar://problem/3225472>
+ // Simplified Chinese pages use the code A3A0 to mean "full-width space".
+ // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
+ // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
+ if (m_encoding == kCFStringEncodingGB_18030_2000)
+ resultString.replace(0xE5E5, ideographicSpace);
+
+ return resultString;
+}
+
+CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+ // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
+
+ // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
+ // Encoding will change the yen sign back into a backslash.
+ String copy(characters, length);
+ copy.replace('\\', m_backslashAsCurrencySymbol);
+ CFStringRef cfs = copy.createCFString();
+
+ CFIndex startPos = 0;
+ CFIndex charactersLeft = CFStringGetLength(cfs);
+ Vector<char> result;
+ size_t size = 0;
+ UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0;
+ while (charactersLeft > 0) {
+ CFRange range = CFRangeMake(startPos, charactersLeft);
+ CFIndex bufferLength;
+ CFStringGetBytes(cfs, range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);
+
+ result.grow(size + bufferLength);
+ unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
+ CFIndex charactersConverted = CFStringGetBytes(cfs, range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
+ size += bufferLength;
+
+ if (charactersConverted != charactersLeft) {
+ unsigned badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+ ++charactersConverted;
+ if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
+ UniChar low = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+ if ((low & 0xFC00) == 0xDC00) { // is low surrogate
+ badChar <<= 10;
+ badChar += low;
+ badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
+ ++charactersConverted;
+ }
+ }
+ UnencodableReplacementArray entity;
+ int entityLength = getUnencodableReplacement(badChar, handling, entity);
+ result.grow(size + entityLength);
+ memcpy(result.data() + size, entity, entityLength);
+ size += entityLength;
+ }
+
+ startPos += charactersConverted;
+ charactersLeft -= charactersConverted;
+ }
+ CFRelease(cfs);
+ return CString(result.data(), size);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h
new file mode 100644
index 0000000..aee4a97
--- /dev/null
+++ b/WebCore/platform/text/mac/TextCodecMac.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecMac_h
+#define TextCodecMac_h
+
+#include "TextCodec.h"
+#include <CoreServices/CoreServices.h>
+
+namespace WebCore {
+
+ typedef ::TextEncoding TECTextEncodingID;
+ const TECTextEncodingID invalidEncoding = kCFStringEncodingInvalidId;
+
+ class TextCodecMac : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ explicit TextCodecMac(TECTextEncodingID);
+ virtual ~TextCodecMac();
+
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+ private:
+ OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+ void* outputBuffer, int outputBufferLength, int& outputLength);
+
+ OSStatus createTECConverter() const;
+ void releaseTECConverter() const;
+
+ TECTextEncodingID m_encoding;
+ UChar m_backslashAsCurrencySymbol;
+ unsigned m_numBufferedBytes;
+ unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+ mutable TECObjectRef m_converterTEC;
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecMac_h
diff --git a/WebCore/platform/text/mac/character-sets.txt b/WebCore/platform/text/mac/character-sets.txt
new file mode 100644
index 0000000..475e78e
--- /dev/null
+++ b/WebCore/platform/text/mac/character-sets.txt
@@ -0,0 +1,1868 @@
+
+===================================================================
+CHARACTER SETS
+
+(last updated 28 January 2005)
+
+These are the official names for character sets that may be used in
+the Internet and may be referred to in Internet documentation. These
+names are expressed in ANSI_X3.4-1968 which is commonly called
+US-ASCII or simply ASCII. The character set most commonly use in the
+Internet and used especially in protocol standards is US-ASCII, this
+is strongly encouraged. The use of the name US-ASCII is also
+encouraged.
+
+The character set names may be up to 40 characters taken from the
+printable characters of US-ASCII. However, no distinction is made
+between use of upper and lower case letters.
+
+The MIBenum value is a unique value for use in MIBs to identify coded
+character sets.
+
+The value space for MIBenum values has been divided into three
+regions. The first region (3-999) consists of coded character sets
+that have been standardized by some standard setting organization.
+This region is intended for standards that do not have subset
+implementations. The second region (1000-1999) is for the Unicode and
+ISO/IEC 10646 coded character sets together with a specification of a
+(set of) sub-repertoires that may occur. The third region (>1999) is
+intended for vendor specific coded character sets.
+
+ Assigned MIB enum Numbers
+ -------------------------
+ 0-2 Reserved
+ 3-999 Set By Standards Organizations
+ 1000-1999 Unicode / 10646
+ 2000-2999 Vendor
+
+The aliases that start with "cs" have been added for use with the
+IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
+maintained by IANA at http://www/iana.org/assignments/ianacharset-mib.
+Note that the ianacharset-mib needs to be kept in sync with this
+registry. These aliases that start with "cs" contain the standard
+numbers along with suggestive names in order to facilitate applications
+that want to display the names in user interfaces. The "cs" stands
+for character set and is provided for applications that need a lower
+case first letter but want to use mixed case thereafter that cannot
+contain any special characters, such as underbar ("_") and dash ("-").
+
+If the character set is from an ISO standard, its cs alias is the ISO
+standard number or name. If the character set is not from an ISO
+standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
+Registration Authority), the ISO Registry number is specified as
+ISOnnn followed by letters suggestive of the name or standards number
+of the code set. When a national or international standard is
+revised, the year of revision is added to the cs alias of the new
+character set entry in the IANA Registry in order to distinguish the
+revised character set from the original character set.
+
+
+Character Set Reference
+------------- ---------
+
+Name: ANSI_X3.4-1968 [RFC1345,KXS2]
+MIBenum: 3
+Source: ECMA registry
+Alias: iso-ir-6
+Alias: ANSI_X3.4-1986
+Alias: ISO_646.irv:1991
+Alias: ASCII
+Alias: ISO646-US
+Alias: US-ASCII (preferred MIME name)
+Alias: us
+Alias: IBM367
+Alias: cp367
+Alias: csASCII
+
+Name: ISO-10646-UTF-1
+MIBenum: 27
+Source: Universal Transfer Format (1), this is the multibyte
+ encoding, that subsets ASCII-7. It does not have byte
+ ordering issues.
+Alias: csISO10646UTF1
+
+Name: ISO_646.basic:1983 [RFC1345,KXS2]
+MIBenum: 28
+Source: ECMA registry
+Alias: ref
+Alias: csISO646basic1983
+
+Name: INVARIANT [RFC1345,KXS2]
+MIBenum: 29
+Alias: csINVARIANT
+
+Name: ISO_646.irv:1983 [RFC1345,KXS2]
+MIBenum: 30
+Source: ECMA registry
+Alias: iso-ir-2
+Alias: irv
+Alias: csISO2IntlRefVersion
+
+Name: BS_4730 [RFC1345,KXS2]
+MIBenum: 20
+Source: ECMA registry
+Alias: iso-ir-4
+Alias: ISO646-GB
+Alias: gb
+Alias: uk
+Alias: csISO4UnitedKingdom
+
+Name: NATS-SEFI [RFC1345,KXS2]
+MIBenum: 31
+Source: ECMA registry
+Alias: iso-ir-8-1
+Alias: csNATSSEFI
+
+Name: NATS-SEFI-ADD [RFC1345,KXS2]
+MIBenum: 32
+Source: ECMA registry
+Alias: iso-ir-8-2
+Alias: csNATSSEFIADD
+
+Name: NATS-DANO [RFC1345,KXS2]
+MIBenum: 33
+Source: ECMA registry
+Alias: iso-ir-9-1
+Alias: csNATSDANO
+
+Name: NATS-DANO-ADD [RFC1345,KXS2]
+MIBenum: 34
+Source: ECMA registry
+Alias: iso-ir-9-2
+Alias: csNATSDANOADD
+
+Name: SEN_850200_B [RFC1345,KXS2]
+MIBenum: 35
+Source: ECMA registry
+Alias: iso-ir-10
+Alias: FI
+Alias: ISO646-FI
+Alias: ISO646-SE
+Alias: se
+Alias: csISO10Swedish
+
+Name: SEN_850200_C [RFC1345,KXS2]
+MIBenum: 21
+Source: ECMA registry
+Alias: iso-ir-11
+Alias: ISO646-SE2
+Alias: se2
+Alias: csISO11SwedishForNames
+
+Name: KS_C_5601-1987 [RFC1345,KXS2]
+MIBenum: 36
+Source: ECMA registry
+Alias: iso-ir-149
+Alias: KS_C_5601-1989
+Alias: KSC_5601
+Alias: korean
+Alias: csKSC56011987
+
+Name: ISO-2022-KR (preferred MIME name) [RFC1557,Choi]
+MIBenum: 37
+Source: RFC-1557 (see also KS_C_5601-1987)
+Alias: csISO2022KR
+
+Name: EUC-KR (preferred MIME name) [RFC1557,Choi]
+MIBenum: 38
+Source: RFC-1557 (see also KS_C_5861-1992)
+Alias: csEUCKR
+
+Name: ISO-2022-JP (preferred MIME name) [RFC1468,Murai]
+MIBenum: 39
+Source: RFC-1468 (see also RFC-2237)
+Alias: csISO2022JP
+
+Name: ISO-2022-JP-2 (preferred MIME name) [RFC1554,Ohta]
+MIBenum: 40
+Source: RFC-1554
+Alias: csISO2022JP2
+
+Name: ISO-2022-CN [RFC1922]
+MIBenum: 104
+Source: RFC-1922
+
+Name: ISO-2022-CN-EXT [RFC1922]
+MIBenum: 105
+Source: RFC-1922
+
+Name: JIS_C6220-1969-jp [RFC1345,KXS2]
+MIBenum: 41
+Source: ECMA registry
+Alias: JIS_C6220-1969
+Alias: iso-ir-13
+Alias: katakana
+Alias: x0201-7
+Alias: csISO13JISC6220jp
+
+Name: JIS_C6220-1969-ro [RFC1345,KXS2]
+MIBenum: 42
+Source: ECMA registry
+Alias: iso-ir-14
+Alias: jp
+Alias: ISO646-JP
+Alias: csISO14JISC6220ro
+
+Name: IT [RFC1345,KXS2]
+MIBenum: 22
+Source: ECMA registry
+Alias: iso-ir-15
+Alias: ISO646-IT
+Alias: csISO15Italian
+
+Name: PT [RFC1345,KXS2]
+MIBenum: 43
+Source: ECMA registry
+Alias: iso-ir-16
+Alias: ISO646-PT
+Alias: csISO16Portuguese
+
+Name: ES [RFC1345,KXS2]
+MIBenum: 23
+Source: ECMA registry
+Alias: iso-ir-17
+Alias: ISO646-ES
+Alias: csISO17Spanish
+
+Name: greek7-old [RFC1345,KXS2]
+MIBenum: 44
+Source: ECMA registry
+Alias: iso-ir-18
+Alias: csISO18Greek7Old
+
+Name: latin-greek [RFC1345,KXS2]
+MIBenum: 45
+Source: ECMA registry
+Alias: iso-ir-19
+Alias: csISO19LatinGreek
+
+Name: DIN_66003 [RFC1345,KXS2]
+MIBenum: 24
+Source: ECMA registry
+Alias: iso-ir-21
+Alias: de
+Alias: ISO646-DE
+Alias: csISO21German
+
+Name: NF_Z_62-010_(1973) [RFC1345,KXS2]
+MIBenum: 46
+Source: ECMA registry
+Alias: iso-ir-25
+Alias: ISO646-FR1
+Alias: csISO25French
+
+Name: Latin-greek-1 [RFC1345,KXS2]
+MIBenum: 47
+Source: ECMA registry
+Alias: iso-ir-27
+Alias: csISO27LatinGreek1
+
+Name: ISO_5427 [RFC1345,KXS2]
+MIBenum: 48
+Source: ECMA registry
+Alias: iso-ir-37
+Alias: csISO5427Cyrillic
+
+Name: JIS_C6226-1978 [RFC1345,KXS2]
+MIBenum: 49
+Source: ECMA registry
+Alias: iso-ir-42
+Alias: csISO42JISC62261978
+
+Name: BS_viewdata [RFC1345,KXS2]
+MIBenum: 50
+Source: ECMA registry
+Alias: iso-ir-47
+Alias: csISO47BSViewdata
+
+Name: INIS [RFC1345,KXS2]
+MIBenum: 51
+Source: ECMA registry
+Alias: iso-ir-49
+Alias: csISO49INIS
+
+Name: INIS-8 [RFC1345,KXS2]
+MIBenum: 52
+Source: ECMA registry
+Alias: iso-ir-50
+Alias: csISO50INIS8
+
+Name: INIS-cyrillic [RFC1345,KXS2]
+MIBenum: 53
+Source: ECMA registry
+Alias: iso-ir-51
+Alias: csISO51INISCyrillic
+
+Name: ISO_5427:1981 [RFC1345,KXS2]
+MIBenum: 54
+Source: ECMA registry
+Alias: iso-ir-54
+Alias: ISO5427Cyrillic1981
+
+Name: ISO_5428:1980 [RFC1345,KXS2]
+MIBenum: 55
+Source: ECMA registry
+Alias: iso-ir-55
+Alias: csISO5428Greek
+
+Name: GB_1988-80 [RFC1345,KXS2]
+MIBenum: 56
+Source: ECMA registry
+Alias: iso-ir-57
+Alias: cn
+Alias: ISO646-CN
+Alias: csISO57GB1988
+
+Name: GB_2312-80 [RFC1345,KXS2]
+MIBenum: 57
+Source: ECMA registry
+Alias: iso-ir-58
+Alias: chinese
+Alias: csISO58GB231280
+
+Name: NS_4551-1 [RFC1345,KXS2]
+MIBenum: 25
+Source: ECMA registry
+Alias: iso-ir-60
+Alias: ISO646-NO
+Alias: no
+Alias: csISO60DanishNorwegian
+Alias: csISO60Norwegian1
+
+Name: NS_4551-2 [RFC1345,KXS2]
+MIBenum: 58
+Source: ECMA registry
+Alias: ISO646-NO2
+Alias: iso-ir-61
+Alias: no2
+Alias: csISO61Norwegian2
+
+Name: NF_Z_62-010 [RFC1345,KXS2]
+MIBenum: 26
+Source: ECMA registry
+Alias: iso-ir-69
+Alias: ISO646-FR
+Alias: fr
+Alias: csISO69French
+
+Name: videotex-suppl [RFC1345,KXS2]
+MIBenum: 59
+Source: ECMA registry
+Alias: iso-ir-70
+Alias: csISO70VideotexSupp1
+
+Name: PT2 [RFC1345,KXS2]
+MIBenum: 60
+Source: ECMA registry
+Alias: iso-ir-84
+Alias: ISO646-PT2
+Alias: csISO84Portuguese2
+
+Name: ES2 [RFC1345,KXS2]
+MIBenum: 61
+Source: ECMA registry
+Alias: iso-ir-85
+Alias: ISO646-ES2
+Alias: csISO85Spanish2
+
+Name: MSZ_7795.3 [RFC1345,KXS2]
+MIBenum: 62
+Source: ECMA registry
+Alias: iso-ir-86
+Alias: ISO646-HU
+Alias: hu
+Alias: csISO86Hungarian
+
+Name: JIS_C6226-1983 [RFC1345,KXS2]
+MIBenum: 63
+Source: ECMA registry
+Alias: iso-ir-87
+Alias: x0208
+Alias: JIS_X0208-1983
+Alias: csISO87JISX0208
+
+Name: greek7 [RFC1345,KXS2]
+MIBenum: 64
+Source: ECMA registry
+Alias: iso-ir-88
+Alias: csISO88Greek7
+
+Name: ASMO_449 [RFC1345,KXS2]
+MIBenum: 65
+Source: ECMA registry
+Alias: ISO_9036
+Alias: arabic7
+Alias: iso-ir-89
+Alias: csISO89ASMO449
+
+Name: iso-ir-90 [RFC1345,KXS2]
+MIBenum: 66
+Source: ECMA registry
+Alias: csISO90
+
+Name: JIS_C6229-1984-a [RFC1345,KXS2]
+MIBenum: 67
+Source: ECMA registry
+Alias: iso-ir-91
+Alias: jp-ocr-a
+Alias: csISO91JISC62291984a
+
+Name: JIS_C6229-1984-b [RFC1345,KXS2]
+MIBenum: 68
+Source: ECMA registry
+Alias: iso-ir-92
+Alias: ISO646-JP-OCR-B
+Alias: jp-ocr-b
+Alias: csISO92JISC62991984b
+
+Name: JIS_C6229-1984-b-add [RFC1345,KXS2]
+MIBenum: 69
+Source: ECMA registry
+Alias: iso-ir-93
+Alias: jp-ocr-b-add
+Alias: csISO93JIS62291984badd
+
+Name: JIS_C6229-1984-hand [RFC1345,KXS2]
+MIBenum: 70
+Source: ECMA registry
+Alias: iso-ir-94
+Alias: jp-ocr-hand
+Alias: csISO94JIS62291984hand
+
+Name: JIS_C6229-1984-hand-add [RFC1345,KXS2]
+MIBenum: 71
+Source: ECMA registry
+Alias: iso-ir-95
+Alias: jp-ocr-hand-add
+Alias: csISO95JIS62291984handadd
+
+Name: JIS_C6229-1984-kana [RFC1345,KXS2]
+MIBenum: 72
+Source: ECMA registry
+Alias: iso-ir-96
+Alias: csISO96JISC62291984kana
+
+Name: ISO_2033-1983 [RFC1345,KXS2]
+MIBenum: 73
+Source: ECMA registry
+Alias: iso-ir-98
+Alias: e13b
+Alias: csISO2033
+
+Name: ANSI_X3.110-1983 [RFC1345,KXS2]
+MIBenum: 74
+Source: ECMA registry
+Alias: iso-ir-99
+Alias: CSA_T500-1983
+Alias: NAPLPS
+Alias: csISO99NAPLPS
+
+Name: ISO_8859-1:1987 [RFC1345,KXS2]
+MIBenum: 4
+Source: ECMA registry
+Alias: iso-ir-100
+Alias: ISO_8859-1
+Alias: ISO-8859-1 (preferred MIME name)
+Alias: latin1
+Alias: l1
+Alias: IBM819
+Alias: CP819
+Alias: csISOLatin1
+
+Name: ISO_8859-2:1987 [RFC1345,KXS2]
+MIBenum: 5
+Source: ECMA registry
+Alias: iso-ir-101
+Alias: ISO_8859-2
+Alias: ISO-8859-2 (preferred MIME name)
+Alias: latin2
+Alias: l2
+Alias: csISOLatin2
+
+Name: T.61-7bit [RFC1345,KXS2]
+MIBenum: 75
+Source: ECMA registry
+Alias: iso-ir-102
+Alias: csISO102T617bit
+
+Name: T.61-8bit [RFC1345,KXS2]
+MIBenum: 76
+Alias: T.61
+Source: ECMA registry
+Alias: iso-ir-103
+Alias: csISO103T618bit
+
+Name: ISO_8859-3:1988 [RFC1345,KXS2]
+MIBenum: 6
+Source: ECMA registry
+Alias: iso-ir-109
+Alias: ISO_8859-3
+Alias: ISO-8859-3 (preferred MIME name)
+Alias: latin3
+Alias: l3
+Alias: csISOLatin3
+
+Name: ISO_8859-4:1988 [RFC1345,KXS2]
+MIBenum: 7
+Source: ECMA registry
+Alias: iso-ir-110
+Alias: ISO_8859-4
+Alias: ISO-8859-4 (preferred MIME name)
+Alias: latin4
+Alias: l4
+Alias: csISOLatin4
+
+Name: ECMA-cyrillic
+MIBenum: 77
+Source: ISO registry (formerly ECMA registry)
+ http://www.itscj.ipsj.jp/ISO-IR/111.pdf
+Alias: iso-ir-111
+Alias: KOI8-E
+Alias: csISO111ECMACyrillic
+
+Name: CSA_Z243.4-1985-1 [RFC1345,KXS2]
+MIBenum: 78
+Source: ECMA registry
+Alias: iso-ir-121
+Alias: ISO646-CA
+Alias: csa7-1
+Alias: ca
+Alias: csISO121Canadian1
+
+Name: CSA_Z243.4-1985-2 [RFC1345,KXS2]
+MIBenum: 79
+Source: ECMA registry
+Alias: iso-ir-122
+Alias: ISO646-CA2
+Alias: csa7-2
+Alias: csISO122Canadian2
+
+Name: CSA_Z243.4-1985-gr [RFC1345,KXS2]
+MIBenum: 80
+Source: ECMA registry
+Alias: iso-ir-123
+Alias: csISO123CSAZ24341985gr
+
+Name: ISO_8859-6:1987 [RFC1345,KXS2]
+MIBenum: 9
+Source: ECMA registry
+Alias: iso-ir-127
+Alias: ISO_8859-6
+Alias: ISO-8859-6 (preferred MIME name)
+Alias: ECMA-114
+Alias: ASMO-708
+Alias: arabic
+Alias: csISOLatinArabic
+
+Name: ISO_8859-6-E [RFC1556,IANA]
+MIBenum: 81
+Source: RFC1556
+Alias: csISO88596E
+Alias: ISO-8859-6-E (preferred MIME name)
+
+Name: ISO_8859-6-I [RFC1556,IANA]
+MIBenum: 82
+Source: RFC1556
+Alias: csISO88596I
+Alias: ISO-8859-6-I (preferred MIME name)
+
+Name: ISO_8859-7:1987 [RFC1947,RFC1345,KXS2]
+MIBenum: 10
+Source: ECMA registry
+Alias: iso-ir-126
+Alias: ISO_8859-7
+Alias: ISO-8859-7 (preferred MIME name)
+Alias: ELOT_928
+Alias: ECMA-118
+Alias: greek
+Alias: greek8
+Alias: csISOLatinGreek
+
+Name: T.101-G2 [RFC1345,KXS2]
+MIBenum: 83
+Source: ECMA registry
+Alias: iso-ir-128
+Alias: csISO128T101G2
+
+Name: ISO_8859-8:1988 [RFC1345,KXS2]
+MIBenum: 11
+Source: ECMA registry
+Alias: iso-ir-138
+Alias: ISO_8859-8
+Alias: ISO-8859-8 (preferred MIME name)
+Alias: hebrew
+Alias: csISOLatinHebrew
+
+Name: ISO_8859-8-E [RFC1556,Nussbacher]
+MIBenum: 84
+Source: RFC1556
+Alias: csISO88598E
+Alias: ISO-8859-8-E (preferred MIME name)
+
+Name: ISO_8859-8-I [RFC1556,Nussbacher]
+MIBenum: 85
+Source: RFC1556
+Alias: csISO88598I
+Alias: ISO-8859-8-I (preferred MIME name)
+
+Name: CSN_369103 [RFC1345,KXS2]
+MIBenum: 86
+Source: ECMA registry
+Alias: iso-ir-139
+Alias: csISO139CSN369103
+
+Name: JUS_I.B1.002 [RFC1345,KXS2]
+MIBenum: 87
+Source: ECMA registry
+Alias: iso-ir-141
+Alias: ISO646-YU
+Alias: js
+Alias: yu
+Alias: csISO141JUSIB1002
+
+Name: ISO_6937-2-add [RFC1345,KXS2]
+MIBenum: 14
+Source: ECMA registry and ISO 6937-2:1983
+Alias: iso-ir-142
+Alias: csISOTextComm
+
+Name: IEC_P27-1 [RFC1345,KXS2]
+MIBenum: 88
+Source: ECMA registry
+Alias: iso-ir-143
+Alias: csISO143IECP271
+
+Name: ISO_8859-5:1988 [RFC1345,KXS2]
+MIBenum: 8
+Source: ECMA registry
+Alias: iso-ir-144
+Alias: ISO_8859-5
+Alias: ISO-8859-5 (preferred MIME name)
+Alias: cyrillic
+Alias: csISOLatinCyrillic
+
+Name: JUS_I.B1.003-serb [RFC1345,KXS2]
+MIBenum: 89
+Source: ECMA registry
+Alias: iso-ir-146
+Alias: serbian
+Alias: csISO146Serbian
+
+Name: JUS_I.B1.003-mac [RFC1345,KXS2]
+MIBenum: 90
+Source: ECMA registry
+Alias: macedonian
+Alias: iso-ir-147
+Alias: csISO147Macedonian
+
+Name: ISO_8859-9:1989 [RFC1345,KXS2]
+MIBenum: 12
+Source: ECMA registry
+Alias: iso-ir-148
+Alias: ISO_8859-9
+Alias: ISO-8859-9 (preferred MIME name)
+Alias: latin5
+Alias: l5
+Alias: csISOLatin5
+
+Name: greek-ccitt [RFC1345,KXS2]
+MIBenum: 91
+Source: ECMA registry
+Alias: iso-ir-150
+Alias: csISO150
+Alias: csISO150GreekCCITT
+
+Name: NC_NC00-10:81 [RFC1345,KXS2]
+MIBenum: 92
+Source: ECMA registry
+Alias: cuba
+Alias: iso-ir-151
+Alias: ISO646-CU
+Alias: csISO151Cuba
+
+Name: ISO_6937-2-25 [RFC1345,KXS2]
+MIBenum: 93
+Source: ECMA registry
+Alias: iso-ir-152
+Alias: csISO6937Add
+
+Name: GOST_19768-74 [RFC1345,KXS2]
+MIBenum: 94
+Source: ECMA registry
+Alias: ST_SEV_358-88
+Alias: iso-ir-153
+Alias: csISO153GOST1976874
+
+Name: ISO_8859-supp [RFC1345,KXS2]
+MIBenum: 95
+Source: ECMA registry
+Alias: iso-ir-154
+Alias: latin1-2-5
+Alias: csISO8859Supp
+
+Name: ISO_10367-box [RFC1345,KXS2]
+MIBenum: 96
+Source: ECMA registry
+Alias: iso-ir-155
+Alias: csISO10367Box
+
+Name: ISO-8859-10 (preferred MIME name) [RFC1345,KXS2]
+MIBenum: 13
+Source: ECMA registry
+Alias: iso-ir-157
+Alias: l6
+Alias: ISO_8859-10:1992
+Alias: csISOLatin6
+Alias: latin6
+
+Name: latin-lap [RFC1345,KXS2]
+MIBenum: 97
+Source: ECMA registry
+Alias: lap
+Alias: iso-ir-158
+Alias: csISO158Lap
+
+Name: JIS_X0212-1990 [RFC1345,KXS2]
+MIBenum: 98
+Source: ECMA registry
+Alias: x0212
+Alias: iso-ir-159
+Alias: csISO159JISX02121990
+
+Name: DS_2089 [RFC1345,KXS2]
+MIBenum: 99
+Source: Danish Standard, DS 2089, February 1974
+Alias: DS2089
+Alias: ISO646-DK
+Alias: dk
+Alias: csISO646Danish
+
+Name: us-dk [RFC1345,KXS2]
+MIBenum: 100
+Alias: csUSDK
+
+Name: dk-us [RFC1345,KXS2]
+MIBenum: 101
+Alias: csDKUS
+
+Name: JIS_X0201 [RFC1345,KXS2]
+MIBenum: 15
+Source: JIS X 0201-1976. One byte only, this is equivalent to
+ JIS/Roman (similar to ASCII) plus eight-bit half-width
+ Katakana
+Alias: X0201
+Alias: csHalfWidthKatakana
+
+Name: KSC5636 [RFC1345,KXS2]
+MIBenum: 102
+Alias: ISO646-KR
+Alias: csKSC5636
+
+Name: ISO-10646-UCS-2
+MIBenum: 1000
+Source: the 2-octet Basic Multilingual Plane, aka Unicode
+ this needs to specify network byte order: the standard
+ does not specify (it is a 16-bit integer space)
+Alias: csUnicode
+
+Name: ISO-10646-UCS-4
+MIBenum: 1001
+Source: the full code space. (same comment about byte order,
+ these are 31-bit numbers.
+Alias: csUCS4
+
+Name: DEC-MCS [RFC1345,KXS2]
+MIBenum: 2008
+Source: VAX/VMS User's Manual,
+ Order Number: AI-Y517A-TE, April 1986.
+Alias: dec
+Alias: csDECMCS
+
+Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
+MIBenum: 2004
+Source: LaserJet IIP Printer User's Manual,
+ HP part no 33471-90901, Hewlet-Packard, June 1989.
+Alias: roman8
+Alias: r8
+Alias: csHPRoman8
+
+Name: macintosh [RFC1345,KXS2]
+MIBenum: 2027
+Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
+Alias: mac
+Alias: csMacintosh
+
+Name: IBM037 [RFC1345,KXS2]
+MIBenum: 2028
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp037
+Alias: ebcdic-cp-us
+Alias: ebcdic-cp-ca
+Alias: ebcdic-cp-wt
+Alias: ebcdic-cp-nl
+Alias: csIBM037
+
+Name: IBM038 [RFC1345,KXS2]
+MIBenum: 2029
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-INT
+Alias: cp038
+Alias: csIBM038
+
+Name: IBM273 [RFC1345,KXS2]
+MIBenum: 2030
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP273
+Alias: csIBM273
+
+Name: IBM274 [RFC1345,KXS2]
+MIBenum: 2031
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-BE
+Alias: CP274
+Alias: csIBM274
+
+Name: IBM275 [RFC1345,KXS2]
+MIBenum: 2032
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-BR
+Alias: cp275
+Alias: csIBM275
+
+Name: IBM277 [RFC1345,KXS2]
+MIBenum: 2033
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-CP-DK
+Alias: EBCDIC-CP-NO
+Alias: csIBM277
+
+Name: IBM278 [RFC1345,KXS2]
+MIBenum: 2034
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP278
+Alias: ebcdic-cp-fi
+Alias: ebcdic-cp-se
+Alias: csIBM278
+
+Name: IBM280 [RFC1345,KXS2]
+MIBenum: 2035
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP280
+Alias: ebcdic-cp-it
+Alias: csIBM280
+
+Name: IBM281 [RFC1345,KXS2]
+MIBenum: 2036
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-JP-E
+Alias: cp281
+Alias: csIBM281
+
+Name: IBM284 [RFC1345,KXS2]
+MIBenum: 2037
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP284
+Alias: ebcdic-cp-es
+Alias: csIBM284
+
+Name: IBM285 [RFC1345,KXS2]
+MIBenum: 2038
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP285
+Alias: ebcdic-cp-gb
+Alias: csIBM285
+
+Name: IBM290 [RFC1345,KXS2]
+MIBenum: 2039
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: cp290
+Alias: EBCDIC-JP-kana
+Alias: csIBM290
+
+Name: IBM297 [RFC1345,KXS2]
+MIBenum: 2040
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp297
+Alias: ebcdic-cp-fr
+Alias: csIBM297
+
+Name: IBM420 [RFC1345,KXS2]
+MIBenum: 2041
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990,
+ IBM NLS RM p 11-11
+Alias: cp420
+Alias: ebcdic-cp-ar1
+Alias: csIBM420
+
+Name: IBM423 [RFC1345,KXS2]
+MIBenum: 2042
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp423
+Alias: ebcdic-cp-gr
+Alias: csIBM423
+
+Name: IBM424 [RFC1345,KXS2]
+MIBenum: 2043
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp424
+Alias: ebcdic-cp-he
+Alias: csIBM424
+
+Name: IBM437 [RFC1345,KXS2]
+MIBenum: 2011
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp437
+Alias: 437
+Alias: csPC8CodePage437
+
+Name: IBM500 [RFC1345,KXS2]
+MIBenum: 2044
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP500
+Alias: ebcdic-cp-be
+Alias: ebcdic-cp-ch
+Alias: csIBM500
+
+Name: IBM775 [HP-PCL5]
+MIBenum: 2087
+Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
+Alias: cp775
+Alias: csPC775Baltic
+
+Name: IBM850 [RFC1345,KXS2]
+MIBenum: 2009
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp850
+Alias: 850
+Alias: csPC850Multilingual
+
+Name: IBM851 [RFC1345,KXS2]
+MIBenum: 2045
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp851
+Alias: 851
+Alias: csIBM851
+
+Name: IBM852 [RFC1345,KXS2]
+MIBenum: 2010
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp852
+Alias: 852
+Alias: csPCp852
+
+Name: IBM855 [RFC1345,KXS2]
+MIBenum: 2046
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp855
+Alias: 855
+Alias: csIBM855
+
+Name: IBM857 [RFC1345,KXS2]
+MIBenum: 2047
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp857
+Alias: 857
+Alias: csIBM857
+
+Name: IBM860 [RFC1345,KXS2]
+MIBenum: 2048
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp860
+Alias: 860
+Alias: csIBM860
+
+Name: IBM861 [RFC1345,KXS2]
+MIBenum: 2049
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp861
+Alias: 861
+Alias: cp-is
+Alias: csIBM861
+
+Name: IBM862 [RFC1345,KXS2]
+MIBenum: 2013
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp862
+Alias: 862
+Alias: csPC862LatinHebrew
+
+Name: IBM863 [RFC1345,KXS2]
+MIBenum: 2050
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp863
+Alias: 863
+Alias: csIBM863
+
+Name: IBM864 [RFC1345,KXS2]
+MIBenum: 2051
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp864
+Alias: csIBM864
+
+Name: IBM865 [RFC1345,KXS2]
+MIBenum: 2052
+Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
+Alias: cp865
+Alias: 865
+Alias: csIBM865
+
+Name: IBM866 [Pond]
+MIBenum: 2086
+Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
+Alias: cp866
+Alias: 866
+Alias: csIBM866
+
+Name: IBM868 [RFC1345,KXS2]
+MIBenum: 2053
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP868
+Alias: cp-ar
+Alias: csIBM868
+
+Name: IBM869 [RFC1345,KXS2]
+MIBenum: 2054
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp869
+Alias: 869
+Alias: cp-gr
+Alias: csIBM869
+
+Name: IBM870 [RFC1345,KXS2]
+MIBenum: 2055
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP870
+Alias: ebcdic-cp-roece
+Alias: ebcdic-cp-yu
+Alias: csIBM870
+
+Name: IBM871 [RFC1345,KXS2]
+MIBenum: 2056
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP871
+Alias: ebcdic-cp-is
+Alias: csIBM871
+
+Name: IBM880 [RFC1345,KXS2]
+MIBenum: 2057
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp880
+Alias: EBCDIC-Cyrillic
+Alias: csIBM880
+
+Name: IBM891 [RFC1345,KXS2]
+MIBenum: 2058
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp891
+Alias: csIBM891
+
+Name: IBM903 [RFC1345,KXS2]
+MIBenum: 2059
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp903
+Alias: csIBM903
+
+Name: IBM904 [RFC1345,KXS2]
+MIBenum: 2060
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp904
+Alias: 904
+Alias: csIBBM904
+
+Name: IBM905 [RFC1345,KXS2]
+MIBenum: 2061
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: CP905
+Alias: ebcdic-cp-tr
+Alias: csIBM905
+
+Name: IBM918 [RFC1345,KXS2]
+MIBenum: 2062
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP918
+Alias: ebcdic-cp-ar2
+Alias: csIBM918
+
+Name: IBM1026 [RFC1345,KXS2]
+MIBenum: 2063
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP1026
+Alias: csIBM1026
+
+Name: EBCDIC-AT-DE [RFC1345,KXS2]
+MIBenum: 2064
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csIBMEBCDICATDE
+
+Name: EBCDIC-AT-DE-A [RFC1345,KXS2]
+MIBenum: 2065
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICATDEA
+
+Name: EBCDIC-CA-FR [RFC1345,KXS2]
+MIBenum: 2066
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICCAFR
+
+Name: EBCDIC-DK-NO [RFC1345,KXS2]
+MIBenum: 2067
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNO
+
+Name: EBCDIC-DK-NO-A [RFC1345,KXS2]
+MIBenum: 2068
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNOA
+
+Name: EBCDIC-FI-SE [RFC1345,KXS2]
+MIBenum: 2069
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISE
+
+Name: EBCDIC-FI-SE-A [RFC1345,KXS2]
+MIBenum: 2070
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISEA
+
+Name: EBCDIC-FR [RFC1345,KXS2]
+MIBenum: 2071
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFR
+
+Name: EBCDIC-IT [RFC1345,KXS2]
+MIBenum: 2072
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICIT
+
+Name: EBCDIC-PT [RFC1345,KXS2]
+MIBenum: 2073
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICPT
+
+Name: EBCDIC-ES [RFC1345,KXS2]
+MIBenum: 2074
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICES
+
+Name: EBCDIC-ES-A [RFC1345,KXS2]
+MIBenum: 2075
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESA
+
+Name: EBCDIC-ES-S [RFC1345,KXS2]
+MIBenum: 2076
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESS
+
+Name: EBCDIC-UK [RFC1345,KXS2]
+MIBenum: 2077
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUK
+
+Name: EBCDIC-US [RFC1345,KXS2]
+MIBenum: 2078
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUS
+
+Name: UNKNOWN-8BIT [RFC1428]
+MIBenum: 2079
+Alias: csUnknown8BiT
+
+Name: MNEMONIC [RFC1345,KXS2]
+MIBenum: 2080
+Source: RFC 1345, also known as "mnemonic+ascii+38"
+Alias: csMnemonic
+
+Name: MNEM [RFC1345,KXS2]
+MIBenum: 2081
+Source: RFC 1345, also known as "mnemonic+ascii+8200"
+Alias: csMnem
+
+Name: VISCII [RFC1456]
+MIBenum: 2082
+Source: RFC 1456
+Alias: csVISCII
+
+Name: VIQR [RFC1456]
+MIBenum: 2083
+Source: RFC 1456
+Alias: csVIQR
+
+Name: KOI8-R (preferred MIME name) [RFC1489]
+MIBenum: 2084
+Source: RFC 1489, based on GOST-19768-74, ISO-6937/8,
+ INIS-Cyrillic, ISO-5427.
+Alias: csKOI8R
+
+Name: KOI8-U [RFC2319]
+MIBenum: 2088
+Source: RFC 2319
+
+Name: IBM00858
+MIBenum: 2089
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858) [Mahdi]
+Alias: CCSID00858
+Alias: CP00858
+Alias: PC-Multilingual-850+euro
+
+Name: IBM00924
+MIBenum: 2090
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924) [Mahdi]
+Alias: CCSID00924
+Alias: CP00924
+Alias: ebcdic-Latin9--euro
+
+Name: IBM01140
+MIBenum: 2091
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140) [Mahdi]
+Alias: CCSID01140
+Alias: CP01140
+Alias: ebcdic-us-37+euro
+
+Name: IBM01141
+MIBenum: 2092
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141) [Mahdi]
+Alias: CCSID01141
+Alias: CP01141
+Alias: ebcdic-de-273+euro
+
+Name: IBM01142
+MIBenum: 2093
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142) [Mahdi]
+Alias: CCSID01142
+Alias: CP01142
+Alias: ebcdic-dk-277+euro
+Alias: ebcdic-no-277+euro
+
+Name: IBM01143
+MIBenum: 2094
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143) [Mahdi]
+Alias: CCSID01143
+Alias: CP01143
+Alias: ebcdic-fi-278+euro
+Alias: ebcdic-se-278+euro
+
+Name: IBM01144
+MIBenum: 2095
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144) [Mahdi]
+Alias: CCSID01144
+Alias: CP01144
+Alias: ebcdic-it-280+euro
+
+Name: IBM01145
+MIBenum: 2096
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145) [Mahdi]
+Alias: CCSID01145
+Alias: CP01145
+Alias: ebcdic-es-284+euro
+
+Name: IBM01146
+MIBenum: 2097
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146) [Mahdi]
+Alias: CCSID01146
+Alias: CP01146
+Alias: ebcdic-gb-285+euro
+
+Name: IBM01147
+MIBenum: 2098
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147) [Mahdi]
+Alias: CCSID01147
+Alias: CP01147
+Alias: ebcdic-fr-297+euro
+
+Name: IBM01148
+MIBenum: 2099
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148) [Mahdi]
+Alias: CCSID01148
+Alias: CP01148
+Alias: ebcdic-international-500+euro
+
+Name: IBM01149
+MIBenum: 2100
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149) [Mahdi]
+Alias: CCSID01149
+Alias: CP01149
+Alias: ebcdic-is-871+euro
+
+Name: Big5-HKSCS [Yick]
+MIBenum: 2101
+Source: See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS)
+Alias: None
+
+Name: IBM1047 [Robrigado]
+MIBenum: 2102
+Source: IBM1047 (EBCDIC Latin 1/Open Systems)
+http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+Alias: IBM-1047
+
+Name: PTCP154 [Uskov]
+MIBenum: 2103
+Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
+Alias: csPTCP154
+Alias: PT154
+Alias: CP154
+Alias: Cyrillic-Asian
+
+Name: Amiga-1251
+MIBenum: 2104
+Source: See (http://www.amiga.ultranet.ru/Amiga-1251.html)
+Alias: Ami1251
+Alias: Amiga1251
+Alias: Ami-1251
+(Aliases are provided for historical reasons and should not be used)
+ [Malyshev]
+
+Name: KOI7-switched
+MIBenum: 2105
+Source: See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
+Aliases: None
+
+Name: UNICODE-1-1 [RFC1641]
+MIBenum: 1010
+Source: RFC 1641
+Alias: csUnicode11
+
+Name: SCSU
+MIBenum: 1011
+Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU) [Scherer]
+Alias: None
+
+Name: UTF-7 [RFC2152]
+MIBenum: 1012
+Source: RFC 2152
+Alias: None
+
+Name: UTF-16BE [RFC2781]
+MIBenum: 1013
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16LE [RFC2781]
+MIBenum: 1014
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16 [RFC2781]
+MIBenum: 1015
+Source: RFC 2781
+Alias: None
+
+Name: CESU-8 [Phipps]
+MIBenum: 1016
+Source: <http://www.unicode.org/unicode/reports/tr26>
+Alias: csCESU-8
+
+Name: UTF-32 [Davis]
+MIBenum: 1017
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32BE [Davis]
+MIBenum: 1018
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32LE [Davis]
+MIBenum: 1019
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: BOCU-1 [Scherer]
+MIBenum: 1020
+Source: http://www.unicode.org/notes/tn6/
+Alias: csBOCU-1
+
+Name: UNICODE-1-1-UTF-7 [RFC1642]
+MIBenum: 103
+Source: RFC 1642
+Alias: csUnicode11UTF7
+
+Name: UTF-8 [RFC3629]
+MIBenum: 106
+Source: RFC 3629
+Alias: None
+
+Name: ISO-8859-13
+MIBenum: 109
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis]
+Alias: None
+
+Name: ISO-8859-14
+MIBenum: 110
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
+Alias: iso-ir-199
+Alias: ISO_8859-14:1998
+Alias: ISO_8859-14
+Alias: latin8
+Alias: iso-celtic
+Alias: l8
+
+Name: ISO-8859-15
+MIBenum: 111
+Source: ISO
+ Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
+Alias: ISO_8859-15
+Alias: Latin-9
+
+Name: ISO-8859-16
+MIBenum: 112
+Source: ISO
+Alias: iso-ir-226
+Alias: ISO_8859-16:2001
+Alias: ISO_8859-16
+Alias: latin10
+Alias: l10
+
+Name: GBK
+MIBenum: 113
+Source: Chinese IT Standardization Technical Committee
+ Please see: <http://www.iana.org/assignments/charset-reg/GBK>
+Alias: CP936
+Alias: MS936
+Alias: windows-936
+
+Name: GB18030
+MIBenum: 114
+Source: Chinese IT Standardization Technical Committee
+ Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
+Alias: None
+
+Name: OSD_EBCDIC_DF04_15
+MIBenum: 115
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
+Alias: None
+
+Name: OSD_EBCDIC_DF03_IRV
+MIBenum: 116
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
+Alias: None
+
+Name: OSD_EBCDIC_DF04_1
+MIBenum: 117
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
+Alias: None
+
+Name: JIS_Encoding
+MIBenum: 16
+Source: JIS X 0202-1991. Uses ISO 2022 escape sequences to
+ shift code sets as documented in JIS X 0202-1991.
+Alias: csJISEncoding
+
+Name: Shift_JIS (preferred MIME name)
+MIBenum: 17
+Source: This charset is an extension of csHalfWidthKatakana by
+ adding graphic characters in JIS X 0208. The CCS's are
+ JIS X0201:1997 and JIS X0208:1997. The
+ complete definition is shown in Appendix 1 of JIS
+ X0208:1997.
+ This charset can be used for the top-level media type "text".
+Alias: MS_Kanji
+Alias: csShiftJIS
+
+Name: Extended_UNIX_Code_Packed_Format_for_Japanese
+MIBenum: 18
+Source: Standardized by OSF, UNIX International, and UNIX Systems
+ Laboratories Pacific. Uses ISO 2022 rules to select
+ code set 0: US-ASCII (a single 7-bit byte set)
+ code set 1: JIS X0208-1990 (a double 8-bit byte set)
+ restricted to A0-FF in both bytes
+ code set 2: Half Width Katakana (a single 7-bit byte set)
+ requiring SS2 as the character prefix
+ code set 3: JIS X0212-1990 (a double 7-bit byte set)
+ restricted to A0-FF in both bytes
+ requiring SS3 as the character prefix
+Alias: csEUCPkdFmtJapanese
+Alias: EUC-JP (preferred MIME name)
+
+Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
+MIBenum: 19
+Source: Used in Japan. Each character is 2 octets.
+ code set 0: US-ASCII (a single 7-bit byte set)
+ 1st byte = 00
+ 2nd byte = 20-7E
+ code set 1: JIS X0208-1990 (a double 7-bit byte set)
+ restricted to A0-FF in both bytes
+ code set 2: Half Width Katakana (a single 7-bit byte set)
+ 1st byte = 00
+ 2nd byte = A0-FF
+ code set 3: JIS X0212-1990 (a double 7-bit byte set)
+ restricted to A0-FF in
+ the first byte
+ and 21-7E in the second byte
+Alias: csEUCFixWidJapanese
+
+Name: ISO-10646-UCS-Basic
+MIBenum: 1002
+Source: ASCII subset of Unicode. Basic Latin = collection 1
+ See ISO 10646, Appendix A
+Alias: csUnicodeASCII
+
+Name: ISO-10646-Unicode-Latin1
+MIBenum: 1003
+Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1
+ Supplement = collections 1 and 2. See ISO 10646,
+ Appendix A. See RFC 1815.
+Alias: csUnicodeLatin1
+Alias: ISO-10646
+
+Name: ISO-10646-J-1
+Source: ISO 10646 Japanese, see RFC 1815.
+
+Name: ISO-Unicode-IBM-1261
+MIBenum: 1005
+Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
+Alias: csUnicodeIBM1261
+
+Name: ISO-Unicode-IBM-1268
+MIBenum: 1006
+Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
+Alias: csUnicodeIBM1268
+
+Name: ISO-Unicode-IBM-1276
+MIBenum: 1007
+Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
+Alias: csUnicodeIBM1276
+
+Name: ISO-Unicode-IBM-1264
+MIBenum: 1008
+Source: IBM Arabic Presentation Set, GCSGID: 1264
+Alias: csUnicodeIBM1264
+
+Name: ISO-Unicode-IBM-1265
+MIBenum: 1009
+Source: IBM Hebrew Presentation Set, GCSGID: 1265
+Alias: csUnicodeIBM1265
+
+Name: ISO-8859-1-Windows-3.0-Latin-1 [HP-PCL5]
+MIBenum: 2000
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.
+ PCL Symbol Set id: 9U
+Alias: csWindows30Latin1
+
+Name: ISO-8859-1-Windows-3.1-Latin-1 [HP-PCL5]
+MIBenum: 2001
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.
+ PCL Symbol Set id: 19U
+Alias: csWindows31Latin1
+
+Name: ISO-8859-2-Windows-Latin-2 [HP-PCL5]
+MIBenum: 2002
+Source: Extended ISO 8859-2. Latin-2 for Windows 3.1.
+ PCL Symbol Set id: 9E
+Alias: csWindows31Latin2
+
+Name: ISO-8859-9-Windows-Latin-5 [HP-PCL5]
+MIBenum: 2003
+Source: Extended ISO 8859-9. Latin-5 for Windows 3.1
+ PCL Symbol Set id: 5T
+Alias: csWindows31Latin5
+
+Name: Adobe-Standard-Encoding [Adobe]
+MIBenum: 2005
+Source: PostScript Language Reference Manual
+ PCL Symbol Set id: 10J
+Alias: csAdobeStandardEncoding
+
+Name: Ventura-US [HP-PCL5]
+MIBenum: 2006
+Source: Ventura US. ASCII plus characters typically used in
+ publishing, like pilcrow, copyright, registered, trade mark,
+ section, dagger, and double dagger in the range A0 (hex)
+ to FF (hex).
+ PCL Symbol Set id: 14J
+Alias: csVenturaUS
+
+Name: Ventura-International [HP-PCL5]
+MIBenum: 2007
+Source: Ventura International. ASCII plus coded characters similar
+ to Roman8.
+ PCL Symbol Set id: 13J
+Alias: csVenturaInternational
+
+Name: PC8-Danish-Norwegian [HP-PCL5]
+MIBenum: 2012
+Source: PC Danish Norwegian
+ 8-bit PC set for Danish Norwegian
+ PCL Symbol Set id: 11U
+Alias: csPC8DanishNorwegian
+
+Name: PC8-Turkish [HP-PCL5]
+MIBenum: 2014
+Source: PC Latin Turkish. PCL Symbol Set id: 9T
+Alias: csPC8Turkish
+
+Name: IBM-Symbols [IBM-CIDT]
+MIBenum: 2015
+Source: Presentation Set, CPGID: 259
+Alias: csIBMSymbols
+
+Name: IBM-Thai [IBM-CIDT]
+MIBenum: 2016
+Source: Presentation Set, CPGID: 838
+Alias: csIBMThai
+
+Name: HP-Legal [HP-PCL5]
+MIBenum: 2017
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 1U
+Alias: csHPLegal
+
+Name: HP-Pi-font [HP-PCL5]
+MIBenum: 2018
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 15U
+Alias: csHPPiFont
+
+Name: HP-Math8 [HP-PCL5]
+MIBenum: 2019
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 8M
+Alias: csHPMath8
+
+Name: Adobe-Symbol-Encoding [Adobe]
+MIBenum: 2020
+Source: PostScript Language Reference Manual
+ PCL Symbol Set id: 5M
+Alias: csHPPSMath
+
+Name: HP-DeskTop [HP-PCL5]
+MIBenum: 2021
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 7J
+Alias: csHPDesktop
+
+Name: Ventura-Math [HP-PCL5]
+MIBenum: 2022
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 6M
+Alias: csVenturaMath
+
+Name: Microsoft-Publishing [HP-PCL5]
+MIBenum: 2023
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 6J
+Alias: csMicrosoftPublishing
+
+Name: Windows-31J
+MIBenum: 2024
+Source: Windows Japanese. A further extension of Shift_JIS
+ to include NEC special characters (Row 13), NEC
+ selection of IBM extensions (Rows 89 to 92), and IBM
+ extensions (Rows 115 to 119). The CCS's are
+ JIS X0201:1997, JIS X0208:1997, and these extensions.
+ This charset can be used for the top-level media type "text",
+ but it is of limited or specialized use (see RFC2278).
+ PCL Symbol Set id: 19K
+Alias: csWindows31J
+
+Name: GB2312 (preferred MIME name)
+MIBenum: 2025
+Source: Chinese for People's Republic of China (PRC) mixed one byte,
+ two byte set:
+ 20-7E = one byte ASCII
+ A1-FE = two byte PRC Kanji
+ See GB 2312-80
+ PCL Symbol Set Id: 18C
+Alias: csGB2312
+
+Name: Big5 (preferred MIME name)
+MIBenum: 2026
+Source: Chinese for Taiwan Multi-byte set.
+ PCL Symbol Set Id: 18T
+Alias: csBig5
+
+Name: windows-1250
+MIBenum: 2250
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
+Alias: None
+
+Name: windows-1251
+MIBenum: 2251
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
+Alias: None
+
+Name: windows-1252
+MIBenum: 2252
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1252) [Wendt]
+Alias: None
+
+Name: windows-1253
+MIBenum: 2253
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
+Alias: None
+
+Name: windows-1254
+MIBenum: 2254
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
+Alias: None
+
+Name: windows-1255
+MIBenum: 2255
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
+Alias: None
+
+Name: windows-1256
+MIBenum: 2256
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
+Alias: None
+
+Name: windows-1257
+MIBenum: 2257
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
+Alias: None
+
+Name: windows-1258
+MIBenum: 2258
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
+Alias: None
+
+Name: TIS-620
+MIBenum: 2259
+Source: Thai Industrial Standards Institute (TISI) [Tantsetthi]
+
+Name: HZ-GB-2312
+MIBenum: 2085
+Source: RFC 1842, RFC 1843 [RFC1842, RFC1843]
+
+
+REFERENCES
+----------
+
+[RFC1345] Simonsen, K., "Character Mnemonics & Character Sets",
+ RFC 1345, Rationel Almen Planlaegning, Rationel Almen
+ Planlaegning, June 1992.
+
+[RFC1428] Vaudreuil, G., "Transition of Internet Mail from
+ Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
+ 1993.
+
+[RFC1456] Vietnamese Standardization Working Group, "Conventions for
+ Encoding the Vietnamese Language VISCII: VIetnamese
+ Standard Code for Information Interchange VIQR: VIetnamese
+ Quoted-Readable Specification Revision 1.1", RFC 1456, May
+ 1993.
+
+[RFC1468] Murai, J., Crispin, M., and E. van der Poel, "Japanese
+ Character Encoding for Internet Messages", RFC 1468,
+ Keio University, Panda Programming, June 1993.
+
+[RFC1489] Chernov, A., "Registration of a Cyrillic Character Set",
+ RFC1489, RELCOM Development Team, July 1993.
+
+[RFC1554] Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
+ Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
+ Technology, ETL, December 1993.
+
+[RFC1556] Nussbacher, H., "Handling of Bi-directional Texts in MIME",
+ RFC1556, Israeli Inter-University, December 1993.
+
+[RFC1557] Choi, U., Chon, K., and H. Park, "Korean Character Encoding
+ for Internet Messages", KAIST, Solvit Chosun Media,
+ December 1993.
+
+[RFC1641] Goldsmith, D., and M. Davis, "Using Unicode with MIME",
+ RFC1641, Taligent, Inc., July 1994.
+
+[RFC1642] Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
+ Inc., July 1994.
+
+[RFC1815] Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
+ RFC 1815, Tokyo Institute of Technology, July 1995.
+
+
+[Adobe] Adobe Systems Incorporated, PostScript Language Reference
+ Manual, second edition, Addison-Wesley Publishing Company,
+ Inc., 1990.
+
+[ECMA Registry] ISO-IR: International Register of Escape Sequences
+ http://www.itscj.ipsj.or.jp/ISO-IE/ Note: The current
+ registration authority is IPSJ/ITSCJ, Japan.
+
+[HP-PCL5] Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
+ (P/N 5021-0329) pp B-13, 1996.
+
+[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
+ for Core Interchange Digitized Type", Publication number
+ S544-3708-01
+
+[RFC1842] Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
+ Characters-Based Chinese Character Encoding for Internet
+ Messages", RFC 1842, Harvard University, Rice University,
+ University of Maryland, August 1995.
+
+[RFC1843] Lee, F., "HZ - A Data Format for Exchanging Files of
+ Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
+ Stanford University, August 1995.
+
+[RFC2152] Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
+ Format of Unicode", RFC 2152, Apple Computer, Inc.,
+ Taligent Inc., May 1997.
+
+[RFC2279] Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
+ RFC 2279, Alis Technologies, January, 1998.
+
+[RFC2781] Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
+ RFC 2781, February 2000.
+
+[RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO 10646",
+ RFC3629, November 2003.
+
+PEOPLE
+------
+
+[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk>
+
+[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr>
+
+[Davis] Mark Davis, <mark@unicode.org>, April 2002.
+
+[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996.
+
+[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000.
+
+[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004
+
+[Murai] Jun Murai <jun@wide.ad.jp>
+
+[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il>
+
+[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995.
+
+[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002.
+
+[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997.
+
+[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002.
+
+[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000,
+ September 2002.
+
+[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000.
+
+[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998.
+
+[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000.
+
+[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002.
+
+[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999.
+
+[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000.
+
+[]
+
+
+
+
+
+
+
diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt
new file mode 100644
index 0000000..bb45e22
--- /dev/null
+++ b/WebCore/platform/text/mac/mac-encodings.txt
@@ -0,0 +1,45 @@
+# We'd like to eliminate this file.
+# It would be nice to get rid of dependence on the TextEncodingConvert entirely.
+# Perhaps we can prove these are not used on the web and remove them.
+# Or perhaps we can get them added to ICU.
+
+# The items on the left are names of TEC TextEncoding values (without the leading kTextEncoding).
+# The items on the right are IANA character set names. Names listed in character-sets.txt are not
+# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
+# that group.
+
+DOSChineseTrad: cp950
+DOSGreek: cp737, ibm737
+EUC_TW: EUC-TW
+ISOLatin10: ISO-8859-16
+ISOLatin6: ISO-8859-10
+ISOLatin8: ISO-8859-14
+ISOLatinThai: ISO-8859-11
+ISO_2022_JP_3: ISO-2022-JP-3
+JIS_C6226_78: JIS_C6226-1978
+JIS_X0208_83: JIS_X0208-1983
+JIS_X0208_90: JIS_X0208-1990
+JIS_X0212_90: JIS_X0212-1990
+KOI8_U: KOI8-U
+MacArabic: x-mac-arabic
+MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
+MacChineseTrad: x-mac-chinesetrad, xmactradchinese
+MacCroatian: x-mac-croatian
+MacDevanagari: x-mac-devanagari
+MacDingbats: x-mac-dingbats
+MacFarsi: x-mac-farsi
+MacGujarati: x-mac-gujarati
+MacGurmukhi: x-mac-gurmukhi
+MacHebrew: x-mac-hebrew
+MacIcelandic: x-mac-icelandic
+MacJapanese: x-mac-japanese
+MacKorean: x-mac-korean
+MacRomanLatin1: x-mac-roman-latin1
+MacRomanian: x-mac-romanian
+MacSymbol: x-mac-symbol
+MacThai: x-mac-thai
+MacTibetan: x-mac-tibetan
+MacVT100: x-mac-vt100
+NextStepLatin: x-nextstep
+ShiftJIS_X0213_00: Shift_JIS_X0213-2000
+WindowsKoreanJohab: johab
diff --git a/WebCore/platform/text/mac/make-charset-table.pl b/WebCore/platform/text/mac/make-charset-table.pl
new file mode 100755
index 0000000..16fd25a
--- /dev/null
+++ b/WebCore/platform/text/mac/make-charset-table.pl
@@ -0,0 +1,225 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+# its contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+use strict;
+
+my %aliasesFromCharsetsFile;
+my %namesWritten;
+
+my $output = "";
+
+my $error = 0;
+
+sub error ($)
+{
+ print STDERR @_, "\n";
+ $error = 1;
+}
+
+sub emit_line
+{
+ my ($name, $prefix, $encoding, $flags) = @_;
+
+ error "$name shows up twice in output" if $namesWritten{$name};
+ $namesWritten{$name} = 1;
+
+ $output .= " { \"$name\", $prefix$encoding },\n";
+}
+
+sub process_platform_encodings
+{
+ my ($filename, $PlatformPrefix) = @_;
+ my $baseFilename = $filename;
+ $baseFilename =~ s|.*/||;
+
+ my %seenPlatformNames;
+ my %seenIANANames;
+
+ open PLATFORM_ENCODINGS, $filename or die;
+
+ while (<PLATFORM_ENCODINGS>) {
+ chomp;
+ s/\#.*$//;
+ s/\s+$//;
+ if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
+ my %aliases;
+
+ my $PlatformNameWithFlags = $PlatformName;
+ if ($flags) {
+ $PlatformNameWithFlags .= ", " . $flags;
+ } else {
+ $flags = "NoEncodingFlags";
+ }
+ error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
+ $seenPlatformNames{$PlatformNameWithFlags} = 1;
+
+ # Build the aliases list.
+ # Also check that no two names are part of the same entry in the charsets file.
+ my @IANANames = split ", ", $IANANames;
+ my $firstName = "";
+ my $canonicalFirstName = "";
+ my $prevName = "";
+ for my $name (@IANANames) {
+ if ($firstName eq "") {
+ if ($name !~ /^[-A-Za-z0-9_]+$/) {
+ error "$name, in $baseFilename, has illegal characters in it";
+ next;
+ }
+ $firstName = $name;
+ } else {
+ if ($name !~ /^[a-z0-9]+$/) {
+ error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
+ next;
+ }
+ if ($name le $prevName) {
+ error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
+ }
+ $prevName = $name;
+ }
+
+ my $canonicalName = lc $name;
+ $canonicalName =~ tr/-_//d;
+
+ $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
+
+ error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
+ $seenIANANames{$canonicalName} = 1;
+
+ $aliases{$canonicalName} = 1;
+ next if !$aliasesFromCharsetsFile{$canonicalName};
+ for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
+ $aliases{$alias} = 1;
+ }
+ for my $otherName (@IANANames) {
+ next if $canonicalName eq $otherName;
+ if ($aliasesFromCharsetsFile{$otherName}
+ && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
+ && $canonicalName le $otherName) {
+ error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
+ }
+ }
+ }
+
+ # write out
+ emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
+ for my $alias (sort keys %aliases) {
+ emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
+ }
+ } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
+ my $PlatformName = $1;
+
+ error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
+ $seenPlatformNames{$PlatformName} = 1;
+ } elsif (/./) {
+ error "syntax error in $baseFilename, line $.";
+ }
+ }
+
+ close PLATFORM_ENCODINGS;
+}
+
+sub process_iana_charset
+{
+ my ($canonical_name, @aliases) = @_;
+
+ return if !$canonical_name;
+
+ my @names = sort $canonical_name, @aliases;
+
+ for my $name (@names) {
+ $aliasesFromCharsetsFile{$name} = \@names;
+ }
+}
+
+sub process_iana_charsets
+{
+ my ($filename) = @_;
+
+ open CHARSETS, $filename or die;
+
+ my %seen;
+
+ my $canonical_name;
+ my @aliases;
+
+ my %exceptions = ( isoir91 => 1, isoir92 => 1 );
+
+ while (<CHARSETS>) {
+ chomp;
+ if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
+ $new_canonical_name = lc $new_canonical_name;
+ $new_canonical_name =~ tr/a-z0-9//cd;
+
+ error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
+ $seen{$new_canonical_name} = $new_canonical_name;
+
+ process_iana_charset $canonical_name, @aliases;
+
+ $canonical_name = $new_canonical_name;
+ @aliases = ();
+ } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
+ $new_alias = lc $new_alias;
+ $new_alias =~ tr/a-z0-9//cd;
+
+ # do this after normalizing the alias, sometimes character-sets.txt
+ # has weird escape characters, e.g. \b after None
+ next if $new_alias eq "none";
+
+ error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
+ push @aliases, $new_alias if !$seen{$new_alias};
+ $seen{$new_alias} = $canonical_name;
+ }
+ }
+
+ process_iana_charset $canonical_name, @aliases;
+
+ close CHARSETS;
+}
+
+# Program body
+
+process_iana_charsets($ARGV[0]);
+process_platform_encodings($ARGV[1], $ARGV[2]);
+
+exit 1 if $error;
+
+print <<EOF
+// File generated by make-charset-table.pl. Do not edit!
+
+#include "config.h"
+#include "CharsetData.h"
+
+namespace WebCore {
+
+ const CharsetEntry CharsetTable[] = {
+$output
+ { 0, 0 }
+ };
+
+}
+EOF