12 files changed, 3358 insertions, 0 deletions
diff --git a/WebCore/platform/text/mac/CharsetData.h b/WebCore/platform/text/mac/CharsetData.h
new file mode 100644
index 0000000..458cecb
--- /dev/null
+++ b/WebCore/platform/text/mac/CharsetData.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+namespace WebCore {
+
+    #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai
+
+    struct CharsetEntry {
+        const char* name;
+        ::TextEncoding encoding;
+    };
+
+    extern const CharsetEntry CharsetTable[];
+
+}
diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c
new file mode 100644
index 0000000..6dbc008
--- /dev/null
+++ b/WebCore/platform/text/mac/ShapeArabic.c
@@ -0,0 +1,555 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2000-2004, International Business Machines
+*   Corporation and others. All Rights Reserved.
+*   Copyright (C) 2007 Apple Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a copy of this
+*   software and associated documentation files (the "Software"), to deal in the Software
+*   without restriction, including without limitation the rights to use, copy, modify,
+*   merge, publish, distribute, and/or sell copies of the Software, and to permit persons
+*   to whom the Software is furnished to do so, provided that the above copyright notice(s)
+*   and this permission notice appear in all copies of the Software and that both the above
+*   copyright notice(s) and this permission notice appear in supporting documentation.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+*   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+*   PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+*   OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+*   CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+*   PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+*   OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*
+*   Except as contained in this notice, the name of a copyright holder shall not be used in
+*   advertising or otherwise to promote the sale, use or other dealings in this Software
+*   without prior written authorization of the copyright holder.
+*
+******************************************************************************
+*
+*   Arabic letter shaping implemented by Ayman Roshdy
+*/
+
+#include "config.h"
+
+#if USE(ATSUI)
+
+#include "ShapeArabic.h"
+
+#include <unicode/utypes.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <unicode/ushape.h>
+#include <wtf/Assertions.h>
+
+/*
+ * ### TODO in general for letter shaping:
+ * - the letter shaping code is UTF-16-unaware; needs update
+ *   + especially invertBuffer()?!
+ * - needs to handle the "Arabic Tail" that is used in some legacy codepages
+ *   as a glyph fragment of wide-glyph letters
+ *   + IBM Unicode conversion tables map it to U+200B (ZWSP)
+ *   + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
+ */
+
+/* definitions for Arabic letter shaping ------------------------------------ */
+
+#define IRRELEVANT 4
+#define LAMTYPE    16
+#define ALEFTYPE   32
+#define LINKR      1
+#define LINKL      2
+
+static const UChar IrrelevantPos[] = {
+    0x0, 0x2, 0x4, 0x6,
+    0x8, 0xA, 0xC, 0xE,
+};
+
+static const UChar araLink[178]=
+{
+  1           + 32 + 256 * 0x11,/*0x0622*/
+  1           + 32 + 256 * 0x13,/*0x0623*/
+  1                + 256 * 0x15,/*0x0624*/
+  1           + 32 + 256 * 0x17,/*0x0625*/
+  1 + 2            + 256 * 0x19,/*0x0626*/
+  1           + 32 + 256 * 0x1D,/*0x0627*/
+  1 + 2            + 256 * 0x1F,/*0x0628*/
+  1                + 256 * 0x23,/*0x0629*/
+  1 + 2            + 256 * 0x25,/*0x062A*/
+  1 + 2            + 256 * 0x29,/*0x062B*/
+  1 + 2            + 256 * 0x2D,/*0x062C*/
+  1 + 2            + 256 * 0x31,/*0x062D*/
+  1 + 2            + 256 * 0x35,/*0x062E*/
+  1                + 256 * 0x39,/*0x062F*/
+  1                + 256 * 0x3B,/*0x0630*/
+  1                + 256 * 0x3D,/*0x0631*/
+  1                + 256 * 0x3F,/*0x0632*/
+  1 + 2            + 256 * 0x41,/*0x0633*/
+  1 + 2            + 256 * 0x45,/*0x0634*/
+  1 + 2            + 256 * 0x49,/*0x0635*/
+  1 + 2            + 256 * 0x4D,/*0x0636*/
+  1 + 2            + 256 * 0x51,/*0x0637*/
+  1 + 2            + 256 * 0x55,/*0x0638*/
+  1 + 2            + 256 * 0x59,/*0x0639*/
+  1 + 2            + 256 * 0x5D,/*0x063A*/
+  0, 0, 0, 0, 0,                /*0x063B-0x063F*/
+  1 + 2,                        /*0x0640*/
+  1 + 2            + 256 * 0x61,/*0x0641*/
+  1 + 2            + 256 * 0x65,/*0x0642*/
+  1 + 2            + 256 * 0x69,/*0x0643*/
+  1 + 2       + 16 + 256 * 0x6D,/*0x0644*/
+  1 + 2            + 256 * 0x71,/*0x0645*/
+  1 + 2            + 256 * 0x75,/*0x0646*/
+  1 + 2            + 256 * 0x79,/*0x0647*/
+  1                + 256 * 0x7D,/*0x0648*/
+  1                + 256 * 0x7F,/*0x0649*/
+  1 + 2            + 256 * 0x81,/*0x064A*/
+  4, 4, 4, 4,                   /*0x064B-0x064E*/
+  4, 4, 4, 4,                   /*0x064F-0x0652*/
+  4, 4, 4, 0, 0,                /*0x0653-0x0657*/
+  0, 0, 0, 0,                   /*0x0658-0x065B*/
+  1                + 256 * 0x85,/*0x065C*/
+  1                + 256 * 0x87,/*0x065D*/
+  1                + 256 * 0x89,/*0x065E*/
+  1                + 256 * 0x8B,/*0x065F*/
+  0, 0, 0, 0, 0,                /*0x0660-0x0664*/
+  0, 0, 0, 0, 0,                /*0x0665-0x0669*/
+  0, 0, 0, 0, 0, 0,             /*0x066A-0x066F*/
+  4,                            /*0x0670*/
+  0,                            /*0x0671*/
+  1           + 32,             /*0x0672*/
+  1           + 32,             /*0x0673*/
+  0,                            /*0x0674*/
+  1           + 32,             /*0x0675*/
+  1, 1,                         /*0x0676-0x0677*/
+  1+2,                          /*0x0678*/
+  1+2              + 256 * 0x16,/*0x0679*/
+  1+2              + 256 * 0x0E,/*0x067A*/
+  1+2              + 256 * 0x02,/*0x067B*/
+  1+2, 1+2,                     /*0x067C-0x067D*/
+  1+2              + 256 * 0x06,/*0x067E*/
+  1+2              + 256 * 0x12,/*0x067F*/
+  1+2              + 256 * 0x0A,/*0x0680*/
+  1+2, 1+2,                     /*0x0681-0x0682*/
+  1+2              + 256 * 0x26,/*0x0683*/
+  1+2              + 256 * 0x22,/*0x0684*/
+  1+2,                          /*0x0685*/
+  1+2              + 256 * 0x2A,/*0x0686*/
+  1+2              + 256 * 0x2E,/*0x0687*/
+  1                + 256 * 0x38,/*0x0688*/
+  1, 1, 1,                      /*0x0689-0x068B*/
+  1                + 256 * 0x34,/*0x068C*/
+  1                + 256 * 0x32,/*0x068D*/
+  1                + 256 * 0x36,/*0x068E*/
+  1, 1,                         /*0x068F-0x0690*/
+  1                + 256 * 0x3C,/*0x0691*/
+  1, 1, 1, 1, 1, 1,             /*0x0692-0x0697*/
+  1                + 256 * 0x3A,/*0x0698*/
+  1,                            /*0x0699*/
+  1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x069F*/
+  1+2, 1+2, 1+2, 1+2,           /*0x06A0-0x06A3*/
+  1+2              + 256 * 0x2E,/*0x06A4*/
+  1+2,                          /*0x06A5*/
+  1+2              + 256 * 0x1E,/*0x06A6*/
+  1+2, 1+2,                     /*0x06A7-0x06A8*/
+  1+2              + 256 * 0x3E,/*0x06A9*/
+  1+2, 1+2, 1+2,                /*0x06AA-0x06AC*/
+  1+2              + 256 * 0x83,/*0x06AD*/
+  1+2,                          /*0x06AE*/
+  1+2              + 256 * 0x42,/*0x06AF*/
+  1+2,                          /*0x06B0*/
+  1+2              + 256 * 0x4A,/*0x06B1*/
+  1+2,                          /*0x06B2*/
+  1+2              + 256 * 0x46,/*0x06B3*/
+  1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B4-0x06B9*/
+  1+2,                          /*0x06BA*/          // FIXME: Seems to have a final form
+  1+2              + 256 * 0x50,/*0x06BB*/
+  1+2, 1+2,                     /*0x06BC-0x06BD*/
+  1+2              + 256 * 0x5A,/*0x06BE*/
+  1+2,                          /*0x06BF*/
+  1,                            /*0x06C0*/
+  1+2              + 256 * 0x56,/*0x06C1*/
+  1+2,                          /*0x06C2*/
+  1, 1,                         /*0x06C3-0x06C4*/
+  1                + 256 * 0x90,/*0x06C5*/
+  1                + 256 * 0x89,/*0x06C6*/
+  1                + 256 * 0x87,/*0x06C7*/
+  1                + 256 * 0x8B,/*0x06C8*/
+  1                + 256 * 0x92,/*0x06C9*/
+  1,                            /*0x06CA*/
+  1                + 256 * 0x8E,/*0x06CB*/
+  1+2              + 256 * 0xAC,/*0x06CC*/
+  1,                            /*0x06CD*/
+  1+2,                          /*0x06CE*/
+  1,                            /*0x06CF*/
+  1+2              + 256 * 0x94,/*0x06D0*/
+  1+2,                          /*0x06D1*/
+  1                + 256 * 0x5E,/*0x06D2*/
+  1                + 256 * 0x60 /*0x06D3*/
+};
+
+static const UChar presLink[141]=
+{
+  1 + 2,                        /*0xFE70*/
+  1 + 2,                        /*0xFE71*/
+  1 + 2, 0, 1+ 2, 0, 1+ 2,      /*0xFE72-0xFE76*/
+  1 + 2,                        /*0xFE77*/
+  1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE78-0xFE81*/
+  1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE82-0xFE85*/
+  0, 0 + 32, 1 + 32, 0 + 32,    /*0xFE86-0xFE89*/
+  1 + 32, 0, 1,  0 + 32,        /*0xFE8A-0xFE8D*/
+  1 + 32, 0, 2,  1 + 2,         /*0xFE8E-0xFE91*/
+  1, 0 + 32, 1 + 32, 0,         /*0xFE92-0xFE95*/
+  2, 1 + 2, 1, 0,               /*0xFE96-0xFE99*/
+  1, 0, 2, 1 + 2,               /*0xFE9A-0xFE9D*/
+  1, 0, 2, 1 + 2,               /*0xFE9E-0xFEA1*/
+  1, 0, 2, 1 + 2,               /*0xFEA2-0xFEA5*/
+  1, 0, 2, 1 + 2,               /*0xFEA6-0xFEA9*/
+  1, 0, 2, 1 + 2,               /*0xFEAA-0xFEAD*/
+  1, 0, 1, 0,                   /*0xFEAE-0xFEB1*/
+  1, 0, 1, 0,                   /*0xFEB2-0xFEB5*/
+  1, 0, 2, 1+2,                 /*0xFEB6-0xFEB9*/
+  1, 0, 2, 1+2,                 /*0xFEBA-0xFEBD*/
+  1, 0, 2, 1+2,                 /*0xFEBE-0xFEC1*/
+  1, 0, 2, 1+2,                 /*0xFEC2-0xFEC5*/
+  1, 0, 2, 1+2,                 /*0xFEC6-0xFEC9*/
+  1, 0, 2, 1+2,                 /*0xFECA-0xFECD*/
+  1, 0, 2, 1+2,                 /*0xFECE-0xFED1*/
+  1, 0, 2, 1+2,                 /*0xFED2-0xFED5*/
+  1, 0, 2, 1+2,                 /*0xFED6-0xFED9*/
+  1, 0, 2, 1+2,                 /*0xFEDA-0xFEDD*/
+  1, 0, 2, 1+2,                 /*0xFEDE-0xFEE1*/
+  1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
+  1 + 16, 0, 2, 1+2,            /*0xFEE6-0xFEE9*/
+  1, 0, 2, 1+2,                 /*0xFEEA-0xFEED*/
+  1, 0, 2, 1+2,                 /*0xFEEE-0xFEF1*/
+  1, 0, 1, 0,                   /*0xFEF2-0xFEF5*/
+  1, 0, 2, 1+2,                 /*0xFEF6-0xFEF9*/
+  1, 0, 1, 0,                   /*0xFEFA-0xFEFD*/
+  1, 0, 1, 0,
+  1
+};
+
+static const UChar convertFEto06[] =
+{
+/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
+/*FE7*/   0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
+/*FE8*/   0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
+/*FE9*/   0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
+/*FEA*/   0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
+/*FEB*/   0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
+/*FEC*/   0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
+/*FED*/   0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
+/*FEE*/   0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
+/*FEF*/   0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
+};
+
+static const UChar shapeTable[4][4][4]=
+{
+  { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
+  { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
+  { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
+  { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
+};
+
+/*
+ *Name     : changeLamAlef
+ *Function : Converts the Alef characters into an equivalent
+ *           LamAlef location in the 0x06xx Range, this is an
+ *           intermediate stage in the operation of the program
+ *           later it'll be converted into the 0xFExx LamAlefs
+ *           in the shaping function.
+ */
+static UChar
+changeLamAlef(UChar ch) {
+
+    switch(ch) {
+    case 0x0622 :
+        return(0x065C);
+        break;
+    case 0x0623 :
+        return(0x065D);
+        break;
+    case 0x0625 :
+        return(0x065E);
+        break;
+    case 0x0627 :
+        return(0x065F);
+        break;
+    default :
+        return(0);
+        break;
+    }
+}
+
+/*
+ *Name     : specialChar
+ *Function : Special Arabic characters need special handling in the shapeUnicode
+ *           function, this function returns 1 or 2 for these special characters
+ */
+static int32_t
+specialChar(UChar ch) {
+
+    if( (ch>0x0621 && ch<0x0626)||(ch==0x0627)||(ch>0x062e && ch<0x0633)||
+        (ch>0x0647 && ch<0x064a)||(ch==0x0629) ) {
+        return (1);
+    }
+    else
+    if( ch>=0x064B && ch<= 0x0652 )
+        return (2);
+    else
+    if( (ch>=0x0653 && ch<= 0x0655) || ch == 0x0670 ||
+        (ch>=0xFE70 && ch<= 0xFE7F) )
+        return (3);
+    else
+        return (0);
+}
+
+/*
+ *Name     : getLink
+ *Function : Resolves the link between the characters as
+ *           Arabic characters have four forms :
+ *           Isolated, Initial, Middle and Final Form
+ */
+static UChar
+getLink(UChar ch) {
+
+    if(ch >= 0x0622 && ch <= 0x06D3) {
+        return(araLink[ch-0x0622]);
+    } else if(ch == 0x200D) {
+        return(3);
+    } else if(ch >= 0x206D && ch <= 0x206F) {
+        return(4);
+    } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
+        return(presLink[ch-0xFE70]);
+    } else {
+        return(0);
+    }
+}
+
+/*
+ *Name     : isTashkeelChar
+ *Function : Returns 1 for Tashkeel characters else return 0
+ */
+static int32_t
+isTashkeelChar(UChar ch) {
+
+    if( ch>=0x064B && ch<= 0x0652 )
+        return (1);
+    else
+        return (0);
+}
+
+/*
+ *Name     : shapeUnicode
+ *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
+ *           arabic Unicode buffer in FExx Range
+ */
+static int32_t
+shapeUnicode(UChar *dest, int32_t sourceLength,
+             int32_t destSize,uint32_t options,
+             UErrorCode *pErrorCode,
+             int tashkeelFlag) {
+
+    int32_t          i, iend;
+    int32_t          prevPos, lastPos,Nx, Nw;
+    unsigned int     Shape;
+    int32_t          flag;
+    int32_t          lamalef_found = 0;
+    UChar            prevLink = 0, lastLink = 0, currLink, nextLink = 0;
+    UChar            wLamalef;
+
+    /*
+     * Converts the input buffer from FExx Range into 06xx Range
+     * to make sure that all characters are in the 06xx range
+     * even the lamalef is converted to the special region in
+     * the 06xx range
+     */
+    for (i = 0; i < sourceLength; i++) {
+        UChar inputChar = dest[i];
+        if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
+            dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
+        }
+    }
+
+    /* sets the index to the end of the buffer, together with the step point to -1 */
+    i = 0;
+    iend = sourceLength;
+
+    /*
+     * This function resolves the link between the characters .
+     * Arabic characters have four forms :
+     * Isolated Form, Initial Form, Middle Form and Final Form
+     */
+    currLink = getLink(dest[i]);
+
+    prevPos = i;
+    lastPos = i;
+    Nx = sourceLength + 2, Nw = 0;
+
+    while (i != iend) {
+        /* If high byte of currLink > 0 then more than one shape */
+        if ((currLink & 0xFF00) > 0 || isTashkeelChar(dest[i])) {
+            Nw = i + 1;
+            while (Nx >= sourceLength) {         /* we need to know about next char */
+                if(Nw == iend) {
+                    nextLink = 0;
+                    Nx = -1;
+                } else {
+                    nextLink = getLink(dest[Nw]);
+                    if((nextLink & IRRELEVANT) == 0) {
+                        Nx = Nw;
+                    } else {
+                        Nw = Nw + 1;
+                    }
+                }
+            }
+
+            if ( ((currLink & ALEFTYPE) > 0)  &&  ((lastLink & LAMTYPE) > 0) ) {
+                lamalef_found = 1;
+                wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
+                if ( wLamalef != 0) {
+                    dest[i] = ' ';               /* The default case is to drop the Alef and replace */
+                    dest[lastPos] =wLamalef;     /* it by a space.                                   */
+                    i=lastPos;
+                }
+                lastLink = prevLink;
+                currLink = getLink(wLamalef);
+            }
+            /*
+             * get the proper shape according to link ability of neighbors
+             * and of character; depends on the order of the shapes
+             * (isolated, initial, middle, final) in the compatibility area
+             */
+             flag  = specialChar(dest[i]);
+
+             Shape = shapeTable[nextLink & (LINKR + LINKL)]
+                               [lastLink & (LINKR + LINKL)]
+                               [currLink & (LINKR + LINKL)];
+
+             if (flag == 1) {
+                 Shape = (Shape == 1 || Shape == 3) ? 1 : 0;
+             }
+             else
+             if(flag == 2) {
+                 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
+                      dest[i] != 0x064C && dest[i] != 0x064D ) {
+                     Shape = 1;
+                     if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE )
+                         Shape = 0;
+                 }
+                 else {
+                     Shape = 0;
+                 }
+             }
+
+             if(flag == 2) {
+                 dest[i] =  0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
+             }
+             else
+                 dest[i] = (UChar)((dest[i] < 0x0670 ? 0xFE70 : 0xFB50) + (currLink >> 8) + Shape);
+        }
+
+        /* move one notch forward */
+        if ((currLink & IRRELEVANT) == 0) {
+              prevLink = lastLink;
+              lastLink = currLink;
+              prevPos = lastPos;
+              lastPos = i;
+        }
+
+        i++;
+        if (i == Nx) {
+            currLink = nextLink;
+            Nx = sourceLength + 2;
+        }
+        else if(i != iend) {
+            currLink = getLink(dest[i]);
+        }
+    }
+
+    destSize = sourceLength;
+
+    return destSize;
+}
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) {
+    int32_t destLength;
+
+    /* usual error checking */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
+    if( source==NULL || sourceLength<-1 ||
+        (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+        options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
+        (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* determine the source length */
+    if(sourceLength==-1) {
+        sourceLength=u_strlen(source);
+    }
+    if(sourceLength==0) {
+        return 0;
+    }
+
+    /* check that source and destination do not overlap */
+    if( dest!=NULL &&
+        ((source<=dest && dest<source+sourceLength) ||
+         (dest<=source && source<dest+destCapacity))
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
+        int32_t outputSize = sourceLength;
+
+        /* calculate destination size */
+        /* TODO: do we ever need to do this pure preflighting? */
+        ASSERT((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK);
+
+        if(outputSize>destCapacity) {
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            return outputSize;
+        }
+
+        /* Start of Arabic letter shaping part */
+        memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
+
+        ASSERT((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL);
+
+        switch(options&U_SHAPE_LETTERS_MASK) {
+        case U_SHAPE_LETTERS_SHAPE :
+            /* Call the shaping function with tashkeel flag == 1 */
+            destLength = shapeUnicode(dest,sourceLength,destCapacity,options,pErrorCode,1);
+            break;
+        case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
+            /* Call the shaping function with tashkeel flag == 0 */
+            destLength = shapeUnicode(dest,sourceLength,destCapacity,options,pErrorCode,0);
+            break;
+        case U_SHAPE_LETTERS_UNSHAPE :
+            ASSERT_NOT_REACHED();
+            break;
+        default :
+            /* will never occur because of validity checks above */
+            destLength = 0;
+            break;
+        }
+
+        /* End of Arabic letter shaping part */
+    } else
+        ASSERT_NOT_REACHED();
+
+    ASSERT((options & U_SHAPE_DIGITS_MASK) == U_SHAPE_DIGITS_NOOP); 
+
+    return sourceLength;
+}
+
+#endif // USE(ATSUI)
diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h
new file mode 100644
index 0000000..8aa577d
--- /dev/null
+++ b/WebCore/platform/text/mac/ShapeArabic.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2007 Apple Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef ShapeArabic_h
+#define ShapeArabic_h
+
+#if USE(ATSUI)
+
+#include <unicode/ushape.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // USE(ATSUI)
+#endif // ShapeArabic_h
diff --git a/WebCore/platform/text/mac/StringImplMac.mm b/WebCore/platform/text/mac/StringImplMac.mm
new file mode 100644
index 0000000..2180b94
--- /dev/null
+++ b/WebCore/platform/text/mac/StringImplMac.mm
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "StringImpl.h"
+
+namespace WebCore {
+
+StringImpl::operator NSString *()
+{
+    return [NSString stringWithCharacters:m_data length:m_length];
+}
+
+}
diff --git a/WebCore/platform/text/mac/StringMac.mm b/WebCore/platform/text/mac/StringMac.mm
new file mode 100644
index 0000000..77942ea
--- /dev/null
+++ b/WebCore/platform/text/mac/StringMac.mm
@@ -0,0 +1,41 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+namespace WebCore {
+
+String::String(NSString* str)
+{
+    if (!str)
+        return;
+
+    CFIndex size = CFStringGetLength(reinterpret_cast<CFStringRef>(str));
+    if (size == 0)
+        m_impl = StringImpl::empty();
+    else {
+        Vector<UChar, 1024> buffer(size);
+        CFStringGetCharacters(reinterpret_cast<CFStringRef>(str), CFRangeMake(0, size), buffer.data());
+        m_impl = StringImpl::create(buffer.data(), size);
+    }
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextBoundaries.mm b/WebCore/platform/text/mac/TextBoundaries.mm
new file mode 100644
index 0000000..ff1dfd2
--- /dev/null
+++ b/WebCore/platform/text/mac/TextBoundaries.mm
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#import "config.h"
+#import "TextBoundaries.h"
+
+namespace WebCore {
+
+void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+{
+    NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+        length:len freeWhenDone:NO];
+    NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+    NSRange range = [attr doubleClickAtIndex:(position >= len) ? len - 1 : position];
+    [attr release];
+    [string release];
+    *start = range.location;
+    *end = range.location + range.length;
+}
+
+int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+{   
+    NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+        length:len freeWhenDone:NO];
+    NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+    int result = [attr nextWordFromIndex:position forward:forward];
+    [attr release];
+    [string release];
+    return result;
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
new file mode 100644
index 0000000..92983eb
--- /dev/null
+++ b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+static const int maxLocaleStringLength = 32;
+
+// This code was swiped from the CarbonCore UnicodeUtilities. One change from that is to use the empty
+// string instead of the "old locale model" as the ultimate fallback. This change is per the UnicodeUtilities
+// engineer.
+static void getTextBreakLocale(char localeStringBuffer[maxLocaleStringLength])
+{
+    // Empty string means "root locale", which is what we use if we can't use a pref.
+
+    // We get the parts string from AppleTextBreakLocale pref.
+    // If that fails then look for the first language in the AppleLanguages pref.
+    CFStringRef prefLocaleStr = (CFStringRef)CFPreferencesCopyValue(CFSTR("AppleTextBreakLocale"),
+        kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
+    if (!prefLocaleStr) {
+        CFArrayRef appleLangArr = (CFArrayRef)CFPreferencesCopyValue(CFSTR("AppleLanguages"),
+            kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
+        if (appleLangArr)  {
+            // Take the topmost language. Retain so that we can blindly release later.                                                                                                   
+            prefLocaleStr = (CFStringRef)CFArrayGetValueAtIndex(appleLangArr, 0);
+            if (prefLocaleStr)
+                CFRetain(prefLocaleStr); 
+            CFRelease(appleLangArr);
+        }
+    }
+    if (prefLocaleStr) {
+        // Canonicalize pref string in case it is not in the canonical format.
+        CFStringRef canonLocaleCFStr = CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, prefLocaleStr);
+        if (canonLocaleCFStr) {
+            CFStringGetCString(canonLocaleCFStr, localeStringBuffer, maxLocaleStringLength, kCFStringEncodingASCII);
+            CFRelease(canonLocaleCFStr);
+        }
+        CFRelease(prefLocaleStr);
+    }
+}
+
+const char* currentTextBreakLocaleID()
+{
+    static char localeStringBuffer[maxLocaleStringLength];
+    static bool gotTextBreakLocale = false;
+    if (!gotTextBreakLocale) {
+        getTextBreakLocale(localeStringBuffer);
+        gotTextBreakLocale = true;
+    }
+    return localeStringBuffer;
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp
new file mode 100644
index 0000000..ac1f0fb
--- /dev/null
+++ b/WebCore/platform/text/mac/TextCodecMac.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecMac.h"
+
+#include "CString.h"
+#include "CharacterNames.h"
+#include "CharsetData.h"
+#include "PlatformString.h"
+#include <wtf/Assertions.h>
+
+using std::auto_ptr;
+using std::min;
+
+namespace WebCore {
+
+// We need to keep this because ICU doesn't support some of the encodings that we need:
+// <http://bugs.webkit.org/show_bug.cgi?id=4195>.
+
+const size_t ConversionBufferSize = 16384;
+
+static TECObjectRef cachedConverterTEC;
+static TECTextEncodingID cachedConverterEncoding = invalidEncoding;
+
+void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+    TECTextEncodingID lastEncoding = invalidEncoding;
+    const char* lastName = 0;
+
+    for (size_t i = 0; CharsetTable[i].name; ++i) {
+        if (CharsetTable[i].encoding != lastEncoding) {
+            lastEncoding = CharsetTable[i].encoding;
+            lastName = CharsetTable[i].name;
+        }
+        registrar(CharsetTable[i].name, lastName);
+    }
+}
+
+static auto_ptr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData)
+{
+    return auto_ptr<TextCodec>(new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData)));
+}
+
+void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
+{
+    TECTextEncodingID lastEncoding = invalidEncoding;
+
+    for (size_t i = 0; CharsetTable[i].name; ++i)
+        if (CharsetTable[i].encoding != lastEncoding) {
+            registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding);
+            lastEncoding = CharsetTable[i].encoding;
+        }
+}
+
+TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
+    : m_encoding(encoding)
+    , m_numBufferedBytes(0)
+    , m_converterTEC(0)
+{
+}
+
+TextCodecMac::~TextCodecMac()
+{
+    releaseTECConverter();
+}
+
+void TextCodecMac::releaseTECConverter() const
+{
+    if (m_converterTEC) {
+        if (cachedConverterTEC != 0)
+            TECDisposeConverter(cachedConverterTEC);
+        cachedConverterTEC = m_converterTEC;
+        cachedConverterEncoding = m_encoding;
+        m_converterTEC = 0;
+    }
+}
+
+OSStatus TextCodecMac::createTECConverter() const
+{
+    bool cachedEncodingEqual = cachedConverterEncoding == m_encoding;
+    cachedConverterEncoding = invalidEncoding;
+
+    if (cachedEncodingEqual && cachedConverterTEC) {
+        m_converterTEC = cachedConverterTEC;
+        cachedConverterTEC = 0;
+        TECClearConverterContextInfo(m_converterTEC);
+    } else {
+        OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding,
+            CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
+        if (status)
+            return status;
+
+        TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask);
+    }
+    
+    return noErr;
+}
+
+OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+    void *outputBuffer, int outputBufferLength, int& outputLength)
+{
+    OSStatus status;
+    unsigned long bytesRead = 0;
+    unsigned long bytesWritten = 0;
+
+    if (m_numBufferedBytes != 0) {
+        // Finish converting a partial character that's in our buffer.
+        
+        // First, fill the partial character buffer with as many bytes as are available.
+        ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes));
+        const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes;
+        const int bytesToPutInBuffer = MIN(spaceInBuffer, inputBufferLength);
+        ASSERT(bytesToPutInBuffer != 0);
+        memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer);
+
+        // Now, do a conversion on the buffer.
+        status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead,
+            reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+        ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer);
+
+        if (status == kTECPartialCharErr && bytesRead == 0) {
+            // Handle the case where the partial character was not converted.
+            if (bytesToPutInBuffer >= spaceInBuffer) {
+                LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes));
+                m_numBufferedBytes = 0;
+                status = kTECUnmappableElementErr; // should never happen, but use this error code
+            } else {
+                // Tell the caller we read all the source bytes and keep them in the buffer.
+                m_numBufferedBytes += bytesToPutInBuffer;
+                bytesRead = bytesToPutInBuffer;
+                status = noErr;
+            }
+        } else {
+            // We are done with the partial character buffer.
+            // Also, we have read some of the bytes from the main buffer.
+            if (bytesRead > m_numBufferedBytes) {
+                bytesRead -= m_numBufferedBytes;
+            } else {
+                LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
+                bytesRead = 0;
+            }
+            m_numBufferedBytes = 0;
+            if (status == kTECPartialCharErr) {
+                // While there may be a partial character problem in the small buffer,
+                // we have to try again and not get confused and think there is a partial
+                // character problem in the large buffer.
+                status = noErr;
+            }
+        }
+    } else {
+        status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead,
+            static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+        ASSERT(static_cast<int>(bytesRead) <= inputBufferLength);
+    }
+
+    // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
+    if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0)
+        status = kTECOutputBufferFullStatus;
+
+    inputLength = bytesRead;
+    outputLength = bytesWritten;
+    return status;
+}
+
+String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+    // Get a converter for the passed-in encoding.
+    if (!m_converterTEC && createTECConverter() != noErr)
+        return String();
+    
+    Vector<UChar> result;
+
+    const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
+    int sourceLength = length;
+    bool bufferWasFull = false;
+    UniChar buffer[ConversionBufferSize];
+
+    while ((sourceLength || bufferWasFull) && !sawError) {
+        int bytesRead = 0;
+        int bytesWritten = 0;
+        OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
+        ASSERT(bytesRead <= sourceLength);
+        sourcePointer += bytesRead;
+        sourceLength -= bytesRead;
+        
+        switch (status) {
+            case noErr:
+            case kTECOutputBufferFullStatus:
+                break;
+            case kTextMalformedInputErr:
+            case kTextUndefinedElementErr:
+                // FIXME: Put FFFD character into the output string in this case?
+                TECClearConverterContextInfo(m_converterTEC);
+                if (stopOnError) {
+                    sawError = true;
+                    break;
+                }
+                if (sourceLength) {
+                    sourcePointer += 1;
+                    sourceLength -= 1;
+                }
+                break;
+            case kTECPartialCharErr: {
+                // Put the partial character into the buffer.
+                ASSERT(m_numBufferedBytes == 0);
+                const int bufferSize = sizeof(m_numBufferedBytes);
+                if (sourceLength < bufferSize) {
+                    memcpy(m_bufferedBytes, sourcePointer, sourceLength);
+                    m_numBufferedBytes = sourceLength;
+                } else {
+                    LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
+                }
+                sourceLength = 0;
+                break;
+            }
+            default:
+                sawError = true;
+                return String();
+        }
+
+        ASSERT(!(bytesWritten % sizeof(UChar)));
+        result.append(buffer, bytesWritten / sizeof(UChar));
+
+        bufferWasFull = status == kTECOutputBufferFullStatus;
+    }
+    
+    if (flush) {
+        unsigned long bytesWritten = 0;
+        TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
+        ASSERT(!(bytesWritten % sizeof(UChar)));
+        result.append(buffer, bytesWritten / sizeof(UChar));
+    }
+
+    String resultString = String::adopt(result);
+
+    // <rdar://problem/3225472>
+    // Simplified Chinese pages use the code A3A0 to mean "full-width space".
+    // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
+    // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
+    if (m_encoding == kCFStringEncodingGB_18030_2000)
+        resultString.replace(0xE5E5, ideographicSpace);
+    
+    return resultString;
+}
+
+CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
+
+    // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
+    // Encoding will change the yen sign back into a backslash.
+    String copy(characters, length);
+    copy.replace('\\', m_backslashAsCurrencySymbol);
+    CFStringRef cfs = copy.createCFString();
+
+    CFIndex startPos = 0;
+    CFIndex charactersLeft = CFStringGetLength(cfs);
+    Vector<char> result;
+    size_t size = 0;
+    UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0;
+    while (charactersLeft > 0) {
+        CFRange range = CFRangeMake(startPos, charactersLeft);
+        CFIndex bufferLength;
+        CFStringGetBytes(cfs, range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);
+
+        result.grow(size + bufferLength);
+        unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
+        CFIndex charactersConverted = CFStringGetBytes(cfs, range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
+        size += bufferLength;
+
+        if (charactersConverted != charactersLeft) {
+            unsigned badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+            ++charactersConverted;
+            if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
+                UniChar low = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+                if ((low & 0xFC00) == 0xDC00) { // is low surrogate
+                    badChar <<= 10;
+                    badChar += low;
+                    badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
+                    ++charactersConverted;
+                }
+            }
+            UnencodableReplacementArray entity;
+            int entityLength = getUnencodableReplacement(badChar, handling, entity);
+            result.grow(size + entityLength);
+            memcpy(result.data() + size, entity, entityLength);
+            size += entityLength;
+        }
+
+        startPos += charactersConverted;
+        charactersLeft -= charactersConverted;
+    }
+    CFRelease(cfs);
+    return CString(result.data(), size);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h
new file mode 100644
index 0000000..aee4a97
--- /dev/null
+++ b/WebCore/platform/text/mac/TextCodecMac.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecMac_h
+#define TextCodecMac_h
+
+#include "TextCodec.h"
+#include <CoreServices/CoreServices.h>
+
+namespace WebCore {
+
+    typedef ::TextEncoding TECTextEncodingID;
+    const TECTextEncodingID invalidEncoding = kCFStringEncodingInvalidId;
+
+    class TextCodecMac : public TextCodec {
+    public:
+        static void registerEncodingNames(EncodingNameRegistrar);
+        static void registerCodecs(TextCodecRegistrar);
+
+        explicit TextCodecMac(TECTextEncodingID);
+        virtual ~TextCodecMac();
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    private:
+        OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+            void* outputBuffer, int outputBufferLength, int& outputLength);
+
+        OSStatus createTECConverter() const;
+        void releaseTECConverter() const;
+
+        TECTextEncodingID m_encoding;
+        UChar m_backslashAsCurrencySymbol;
+        unsigned m_numBufferedBytes;
+        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+        mutable TECObjectRef m_converterTEC;
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecMac_h
diff --git a/WebCore/platform/text/mac/character-sets.txt b/WebCore/platform/text/mac/character-sets.txt
new file mode 100644
index 0000000..475e78e
--- /dev/null
+++ b/WebCore/platform/text/mac/character-sets.txt
@@ -0,0 +1,1868 @@
+
+===================================================================
+CHARACTER SETS
+
+(last updated 28 January 2005)
+
+These are the official names for character sets that may be used in
+the Internet and may be referred to in Internet documentation.  These
+names are expressed in ANSI_X3.4-1968 which is commonly called
+US-ASCII or simply ASCII.  The character set most commonly use in the
+Internet and used especially in protocol standards is US-ASCII, this
+is strongly encouraged.  The use of the name US-ASCII is also
+encouraged.
+
+The character set names may be up to 40 characters taken from the
+printable characters of US-ASCII.  However, no distinction is made
+between use of upper and lower case letters.
+
+The MIBenum value is a unique value for use in MIBs to identify coded
+character sets.
+
+The value space for MIBenum values has been divided into three
+regions. The first region (3-999) consists of coded character sets
+that have been standardized by some standard setting organization.
+This region is intended for standards that do not have subset
+implementations. The second region (1000-1999) is for the Unicode and
+ISO/IEC 10646 coded character sets together with a specification of a
+(set of) sub-repertoires that may occur.  The third region (>1999) is
+intended for vendor specific coded character sets.
+
+	Assigned MIB enum Numbers
+	-------------------------
+	0-2		Reserved
+	3-999		Set By Standards Organizations
+	1000-1999	Unicode / 10646
+	2000-2999	Vendor
+
+The aliases that start with "cs" have been added for use with the
+IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
+maintained by IANA at http://www/iana.org/assignments/ianacharset-mib.
+Note that the ianacharset-mib needs to be kept in sync with this
+registry.  These aliases that start with "cs" contain the standard 
+numbers along with suggestive names in order to facilitate applications 
+that want to display the names in user interfaces.  The "cs" stands 
+for character set and is provided for applications that need a lower 
+case first letter but want to use mixed case thereafter that cannot 
+contain any special characters, such as underbar ("_") and dash ("-").  
+
+If the character set is from an ISO standard, its cs alias is the ISO
+standard number or name.  If the character set is not from an ISO
+standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
+Registration Authority), the ISO Registry number is specified as
+ISOnnn followed by letters suggestive of the name or standards number
+of the code set.  When a national or international standard is
+revised, the year of revision is added to the cs alias of the new
+character set entry in the IANA Registry in order to distinguish the
+revised character set from the original character set.
+
+
+Character Set                                               Reference
+-------------                                               ---------
+
+Name: ANSI_X3.4-1968                                   [RFC1345,KXS2]
+MIBenum: 3
+Source: ECMA registry
+Alias: iso-ir-6
+Alias: ANSI_X3.4-1986
+Alias: ISO_646.irv:1991
+Alias: ASCII
+Alias: ISO646-US
+Alias: US-ASCII (preferred MIME name)
+Alias: us
+Alias: IBM367
+Alias: cp367
+Alias: csASCII
+
+Name: ISO-10646-UTF-1
+MIBenum: 27
+Source: Universal Transfer Format (1), this is the multibyte
+        encoding, that subsets ASCII-7. It does not have byte
+        ordering issues.
+Alias: csISO10646UTF1
+
+Name: ISO_646.basic:1983                                [RFC1345,KXS2]
+MIBenum: 28
+Source: ECMA registry
+Alias: ref
+Alias: csISO646basic1983
+
+Name: INVARIANT                                         [RFC1345,KXS2]
+MIBenum: 29
+Alias: csINVARIANT
+
+Name: ISO_646.irv:1983                                  [RFC1345,KXS2]
+MIBenum: 30
+Source: ECMA registry
+Alias: iso-ir-2
+Alias: irv
+Alias: csISO2IntlRefVersion
+
+Name: BS_4730                                           [RFC1345,KXS2]
+MIBenum: 20
+Source: ECMA registry
+Alias: iso-ir-4
+Alias: ISO646-GB
+Alias: gb
+Alias: uk
+Alias: csISO4UnitedKingdom
+
+Name: NATS-SEFI                                         [RFC1345,KXS2]
+MIBenum: 31
+Source: ECMA registry
+Alias: iso-ir-8-1
+Alias: csNATSSEFI
+
+Name: NATS-SEFI-ADD                                     [RFC1345,KXS2]
+MIBenum: 32
+Source: ECMA registry
+Alias: iso-ir-8-2
+Alias: csNATSSEFIADD
+
+Name: NATS-DANO                                         [RFC1345,KXS2]
+MIBenum: 33
+Source: ECMA registry
+Alias: iso-ir-9-1
+Alias: csNATSDANO
+
+Name: NATS-DANO-ADD                                     [RFC1345,KXS2]
+MIBenum: 34
+Source: ECMA registry
+Alias: iso-ir-9-2
+Alias: csNATSDANOADD
+
+Name: SEN_850200_B                                      [RFC1345,KXS2]
+MIBenum: 35
+Source: ECMA registry
+Alias: iso-ir-10
+Alias: FI
+Alias: ISO646-FI
+Alias: ISO646-SE
+Alias: se
+Alias: csISO10Swedish
+
+Name: SEN_850200_C                                      [RFC1345,KXS2]
+MIBenum: 21
+Source: ECMA registry
+Alias: iso-ir-11
+Alias: ISO646-SE2
+Alias: se2
+Alias: csISO11SwedishForNames
+
+Name: KS_C_5601-1987                                    [RFC1345,KXS2]
+MIBenum: 36
+Source: ECMA registry
+Alias: iso-ir-149
+Alias: KS_C_5601-1989
+Alias: KSC_5601
+Alias: korean
+Alias: csKSC56011987
+
+Name: ISO-2022-KR  (preferred MIME name)                [RFC1557,Choi]
+MIBenum: 37
+Source: RFC-1557 (see also KS_C_5601-1987)
+Alias: csISO2022KR
+
+Name: EUC-KR  (preferred MIME name)                     [RFC1557,Choi]
+MIBenum: 38
+Source: RFC-1557 (see also KS_C_5861-1992)
+Alias: csEUCKR
+
+Name: ISO-2022-JP  (preferred MIME name)               [RFC1468,Murai]
+MIBenum: 39
+Source: RFC-1468 (see also RFC-2237)
+Alias: csISO2022JP
+
+Name: ISO-2022-JP-2  (preferred MIME name)              [RFC1554,Ohta]
+MIBenum: 40
+Source: RFC-1554
+Alias: csISO2022JP2
+
+Name: ISO-2022-CN                                            [RFC1922]
+MIBenum: 104
+Source: RFC-1922
+
+Name: ISO-2022-CN-EXT                                        [RFC1922]
+MIBenum: 105
+Source: RFC-1922
+
+Name: JIS_C6220-1969-jp                                 [RFC1345,KXS2]
+MIBenum: 41
+Source: ECMA registry
+Alias: JIS_C6220-1969
+Alias: iso-ir-13
+Alias: katakana
+Alias: x0201-7
+Alias: csISO13JISC6220jp
+
+Name: JIS_C6220-1969-ro                                 [RFC1345,KXS2]
+MIBenum: 42
+Source: ECMA registry
+Alias: iso-ir-14
+Alias: jp
+Alias: ISO646-JP
+Alias: csISO14JISC6220ro
+
+Name: IT                                                [RFC1345,KXS2]
+MIBenum: 22
+Source: ECMA registry
+Alias: iso-ir-15
+Alias: ISO646-IT
+Alias: csISO15Italian
+
+Name: PT                                                [RFC1345,KXS2]
+MIBenum: 43
+Source: ECMA registry
+Alias: iso-ir-16
+Alias: ISO646-PT
+Alias: csISO16Portuguese
+
+Name: ES                                                [RFC1345,KXS2]
+MIBenum: 23
+Source: ECMA registry
+Alias: iso-ir-17
+Alias: ISO646-ES
+Alias: csISO17Spanish
+
+Name: greek7-old                                        [RFC1345,KXS2]
+MIBenum: 44
+Source: ECMA registry
+Alias: iso-ir-18
+Alias: csISO18Greek7Old
+
+Name: latin-greek                                       [RFC1345,KXS2]
+MIBenum: 45
+Source: ECMA registry
+Alias: iso-ir-19
+Alias: csISO19LatinGreek
+
+Name: DIN_66003                                         [RFC1345,KXS2]
+MIBenum: 24
+Source: ECMA registry
+Alias: iso-ir-21
+Alias: de
+Alias: ISO646-DE
+Alias: csISO21German
+
+Name: NF_Z_62-010_(1973)                                [RFC1345,KXS2]
+MIBenum: 46
+Source: ECMA registry
+Alias: iso-ir-25
+Alias: ISO646-FR1
+Alias: csISO25French
+
+Name: Latin-greek-1                                     [RFC1345,KXS2]
+MIBenum: 47
+Source: ECMA registry
+Alias: iso-ir-27
+Alias: csISO27LatinGreek1
+
+Name: ISO_5427                                          [RFC1345,KXS2]
+MIBenum: 48
+Source: ECMA registry
+Alias: iso-ir-37
+Alias: csISO5427Cyrillic
+
+Name: JIS_C6226-1978                                    [RFC1345,KXS2]
+MIBenum: 49
+Source: ECMA registry
+Alias: iso-ir-42
+Alias: csISO42JISC62261978
+
+Name: BS_viewdata                                       [RFC1345,KXS2]
+MIBenum: 50
+Source: ECMA registry
+Alias: iso-ir-47
+Alias: csISO47BSViewdata
+
+Name: INIS                                              [RFC1345,KXS2]
+MIBenum: 51
+Source: ECMA registry
+Alias: iso-ir-49
+Alias: csISO49INIS
+
+Name: INIS-8                                            [RFC1345,KXS2]
+MIBenum: 52
+Source: ECMA registry
+Alias: iso-ir-50
+Alias: csISO50INIS8
+
+Name: INIS-cyrillic                                     [RFC1345,KXS2]
+MIBenum: 53
+Source: ECMA registry
+Alias: iso-ir-51
+Alias: csISO51INISCyrillic
+
+Name: ISO_5427:1981                                     [RFC1345,KXS2]
+MIBenum: 54
+Source: ECMA registry
+Alias: iso-ir-54
+Alias: ISO5427Cyrillic1981
+
+Name: ISO_5428:1980                                     [RFC1345,KXS2]
+MIBenum: 55
+Source: ECMA registry
+Alias: iso-ir-55
+Alias: csISO5428Greek
+
+Name: GB_1988-80                                        [RFC1345,KXS2]
+MIBenum: 56
+Source: ECMA registry
+Alias: iso-ir-57
+Alias: cn
+Alias: ISO646-CN
+Alias: csISO57GB1988
+
+Name: GB_2312-80                                        [RFC1345,KXS2]
+MIBenum: 57
+Source: ECMA registry
+Alias: iso-ir-58
+Alias: chinese
+Alias: csISO58GB231280
+
+Name: NS_4551-1                                         [RFC1345,KXS2]
+MIBenum: 25
+Source: ECMA registry
+Alias: iso-ir-60
+Alias: ISO646-NO
+Alias: no
+Alias: csISO60DanishNorwegian
+Alias: csISO60Norwegian1
+
+Name: NS_4551-2                                          [RFC1345,KXS2]
+MIBenum: 58
+Source: ECMA registry
+Alias: ISO646-NO2
+Alias: iso-ir-61
+Alias: no2
+Alias: csISO61Norwegian2
+
+Name: NF_Z_62-010                                        [RFC1345,KXS2]
+MIBenum: 26
+Source: ECMA registry
+Alias: iso-ir-69
+Alias: ISO646-FR
+Alias: fr
+Alias: csISO69French
+
+Name: videotex-suppl                                     [RFC1345,KXS2]
+MIBenum: 59
+Source: ECMA registry
+Alias: iso-ir-70
+Alias: csISO70VideotexSupp1
+
+Name: PT2                                                [RFC1345,KXS2]
+MIBenum: 60
+Source: ECMA registry
+Alias: iso-ir-84
+Alias: ISO646-PT2
+Alias: csISO84Portuguese2
+
+Name: ES2                                                [RFC1345,KXS2]
+MIBenum: 61
+Source: ECMA registry
+Alias: iso-ir-85
+Alias: ISO646-ES2
+Alias: csISO85Spanish2
+
+Name: MSZ_7795.3                                         [RFC1345,KXS2]
+MIBenum: 62
+Source: ECMA registry
+Alias: iso-ir-86
+Alias: ISO646-HU
+Alias: hu
+Alias: csISO86Hungarian
+
+Name: JIS_C6226-1983                                     [RFC1345,KXS2]
+MIBenum: 63
+Source: ECMA registry
+Alias: iso-ir-87
+Alias: x0208
+Alias: JIS_X0208-1983
+Alias: csISO87JISX0208
+
+Name: greek7                                             [RFC1345,KXS2]
+MIBenum: 64
+Source: ECMA registry
+Alias: iso-ir-88
+Alias: csISO88Greek7
+
+Name: ASMO_449                                           [RFC1345,KXS2]
+MIBenum: 65
+Source: ECMA registry
+Alias: ISO_9036
+Alias: arabic7
+Alias: iso-ir-89
+Alias: csISO89ASMO449
+
+Name: iso-ir-90                                          [RFC1345,KXS2]
+MIBenum: 66
+Source: ECMA registry
+Alias: csISO90
+
+Name: JIS_C6229-1984-a                                   [RFC1345,KXS2]
+MIBenum: 67
+Source: ECMA registry
+Alias: iso-ir-91
+Alias: jp-ocr-a
+Alias: csISO91JISC62291984a
+
+Name: JIS_C6229-1984-b                                   [RFC1345,KXS2]
+MIBenum: 68
+Source: ECMA registry
+Alias: iso-ir-92
+Alias: ISO646-JP-OCR-B
+Alias: jp-ocr-b
+Alias: csISO92JISC62991984b
+
+Name: JIS_C6229-1984-b-add                               [RFC1345,KXS2]
+MIBenum: 69
+Source: ECMA registry
+Alias: iso-ir-93
+Alias: jp-ocr-b-add
+Alias: csISO93JIS62291984badd
+
+Name: JIS_C6229-1984-hand                                [RFC1345,KXS2]
+MIBenum: 70
+Source: ECMA registry
+Alias: iso-ir-94
+Alias: jp-ocr-hand
+Alias: csISO94JIS62291984hand
+
+Name: JIS_C6229-1984-hand-add                            [RFC1345,KXS2]
+MIBenum: 71
+Source: ECMA registry
+Alias: iso-ir-95
+Alias: jp-ocr-hand-add
+Alias: csISO95JIS62291984handadd
+
+Name: JIS_C6229-1984-kana                                [RFC1345,KXS2]
+MIBenum: 72
+Source: ECMA registry
+Alias: iso-ir-96
+Alias: csISO96JISC62291984kana
+
+Name: ISO_2033-1983                                      [RFC1345,KXS2]
+MIBenum: 73
+Source: ECMA registry
+Alias: iso-ir-98
+Alias: e13b
+Alias: csISO2033
+
+Name: ANSI_X3.110-1983                                   [RFC1345,KXS2]
+MIBenum: 74
+Source: ECMA registry
+Alias: iso-ir-99
+Alias: CSA_T500-1983
+Alias: NAPLPS
+Alias: csISO99NAPLPS
+
+Name: ISO_8859-1:1987                                    [RFC1345,KXS2]
+MIBenum: 4
+Source: ECMA registry
+Alias: iso-ir-100
+Alias: ISO_8859-1
+Alias: ISO-8859-1 (preferred MIME name)
+Alias: latin1
+Alias: l1
+Alias: IBM819
+Alias: CP819
+Alias: csISOLatin1
+
+Name: ISO_8859-2:1987                                    [RFC1345,KXS2]
+MIBenum: 5
+Source: ECMA registry
+Alias: iso-ir-101
+Alias: ISO_8859-2
+Alias: ISO-8859-2 (preferred MIME name)
+Alias: latin2
+Alias: l2
+Alias: csISOLatin2
+
+Name: T.61-7bit                                          [RFC1345,KXS2]
+MIBenum: 75
+Source: ECMA registry
+Alias: iso-ir-102
+Alias: csISO102T617bit
+
+Name: T.61-8bit                                          [RFC1345,KXS2]
+MIBenum: 76
+Alias: T.61
+Source: ECMA registry
+Alias: iso-ir-103
+Alias: csISO103T618bit
+
+Name: ISO_8859-3:1988                                    [RFC1345,KXS2]
+MIBenum: 6
+Source: ECMA registry
+Alias: iso-ir-109
+Alias: ISO_8859-3
+Alias: ISO-8859-3 (preferred MIME name)
+Alias: latin3
+Alias: l3
+Alias: csISOLatin3
+
+Name: ISO_8859-4:1988                                    [RFC1345,KXS2]
+MIBenum: 7
+Source: ECMA registry
+Alias: iso-ir-110
+Alias: ISO_8859-4
+Alias: ISO-8859-4 (preferred MIME name)
+Alias: latin4
+Alias: l4
+Alias: csISOLatin4
+
+Name: ECMA-cyrillic                                     
+MIBenum: 77
+Source: ISO registry (formerly ECMA registry)
+         http://www.itscj.ipsj.jp/ISO-IR/111.pdf
+Alias: iso-ir-111
+Alias: KOI8-E
+Alias: csISO111ECMACyrillic
+
+Name: CSA_Z243.4-1985-1                                  [RFC1345,KXS2]
+MIBenum: 78
+Source: ECMA registry
+Alias: iso-ir-121
+Alias: ISO646-CA
+Alias: csa7-1
+Alias: ca
+Alias: csISO121Canadian1
+
+Name: CSA_Z243.4-1985-2                                  [RFC1345,KXS2]
+MIBenum: 79
+Source: ECMA registry
+Alias: iso-ir-122
+Alias: ISO646-CA2
+Alias: csa7-2
+Alias: csISO122Canadian2
+
+Name: CSA_Z243.4-1985-gr                                 [RFC1345,KXS2]
+MIBenum: 80
+Source: ECMA registry
+Alias: iso-ir-123
+Alias: csISO123CSAZ24341985gr
+
+Name: ISO_8859-6:1987                                    [RFC1345,KXS2]
+MIBenum: 9
+Source: ECMA registry
+Alias: iso-ir-127
+Alias: ISO_8859-6
+Alias: ISO-8859-6 (preferred MIME name)
+Alias: ECMA-114
+Alias: ASMO-708
+Alias: arabic
+Alias: csISOLatinArabic
+
+Name: ISO_8859-6-E                                       [RFC1556,IANA]
+MIBenum: 81
+Source: RFC1556
+Alias: csISO88596E
+Alias: ISO-8859-6-E (preferred MIME name)
+
+Name: ISO_8859-6-I                                       [RFC1556,IANA]
+MIBenum: 82
+Source: RFC1556
+Alias: csISO88596I
+Alias: ISO-8859-6-I (preferred MIME name)
+
+Name: ISO_8859-7:1987                            [RFC1947,RFC1345,KXS2]
+MIBenum: 10
+Source: ECMA registry
+Alias: iso-ir-126
+Alias: ISO_8859-7
+Alias: ISO-8859-7 (preferred MIME name)
+Alias: ELOT_928
+Alias: ECMA-118
+Alias: greek
+Alias: greek8
+Alias: csISOLatinGreek
+
+Name: T.101-G2                                            [RFC1345,KXS2]
+MIBenum: 83
+Source: ECMA registry
+Alias: iso-ir-128
+Alias: csISO128T101G2
+
+Name: ISO_8859-8:1988                                     [RFC1345,KXS2]
+MIBenum: 11
+Source: ECMA registry
+Alias: iso-ir-138
+Alias: ISO_8859-8
+Alias: ISO-8859-8 (preferred MIME name)
+Alias: hebrew
+Alias: csISOLatinHebrew
+
+Name: ISO_8859-8-E                                  [RFC1556,Nussbacher]
+MIBenum: 84
+Source: RFC1556
+Alias: csISO88598E
+Alias: ISO-8859-8-E (preferred MIME name)
+
+Name: ISO_8859-8-I                                  [RFC1556,Nussbacher]
+MIBenum: 85
+Source: RFC1556
+Alias: csISO88598I
+Alias: ISO-8859-8-I (preferred MIME name)
+
+Name: CSN_369103                                          [RFC1345,KXS2]
+MIBenum: 86
+Source: ECMA registry
+Alias: iso-ir-139
+Alias: csISO139CSN369103
+
+Name: JUS_I.B1.002                                        [RFC1345,KXS2]
+MIBenum: 87
+Source: ECMA registry
+Alias: iso-ir-141
+Alias: ISO646-YU
+Alias: js
+Alias: yu
+Alias: csISO141JUSIB1002
+
+Name: ISO_6937-2-add                                      [RFC1345,KXS2]
+MIBenum: 14
+Source: ECMA registry and ISO 6937-2:1983
+Alias: iso-ir-142
+Alias: csISOTextComm
+
+Name: IEC_P27-1                                           [RFC1345,KXS2]
+MIBenum: 88
+Source: ECMA registry
+Alias: iso-ir-143
+Alias: csISO143IECP271
+
+Name: ISO_8859-5:1988                                     [RFC1345,KXS2]
+MIBenum: 8
+Source: ECMA registry
+Alias: iso-ir-144
+Alias: ISO_8859-5
+Alias: ISO-8859-5 (preferred MIME name)
+Alias: cyrillic
+Alias: csISOLatinCyrillic
+
+Name: JUS_I.B1.003-serb                                   [RFC1345,KXS2]
+MIBenum: 89
+Source: ECMA registry
+Alias: iso-ir-146
+Alias: serbian
+Alias: csISO146Serbian
+
+Name: JUS_I.B1.003-mac                                    [RFC1345,KXS2]
+MIBenum: 90
+Source: ECMA registry
+Alias: macedonian
+Alias: iso-ir-147
+Alias: csISO147Macedonian
+
+Name: ISO_8859-9:1989                                     [RFC1345,KXS2]
+MIBenum: 12
+Source: ECMA registry
+Alias: iso-ir-148
+Alias: ISO_8859-9
+Alias: ISO-8859-9 (preferred MIME name)
+Alias: latin5
+Alias: l5
+Alias: csISOLatin5
+
+Name: greek-ccitt                                         [RFC1345,KXS2]
+MIBenum: 91
+Source: ECMA registry
+Alias: iso-ir-150
+Alias: csISO150
+Alias: csISO150GreekCCITT
+
+Name: NC_NC00-10:81                                       [RFC1345,KXS2]
+MIBenum: 92
+Source: ECMA registry
+Alias: cuba
+Alias: iso-ir-151
+Alias: ISO646-CU
+Alias: csISO151Cuba
+
+Name: ISO_6937-2-25                                       [RFC1345,KXS2]
+MIBenum: 93
+Source: ECMA registry
+Alias: iso-ir-152
+Alias: csISO6937Add
+
+Name: GOST_19768-74                                       [RFC1345,KXS2]
+MIBenum: 94
+Source: ECMA registry
+Alias: ST_SEV_358-88
+Alias: iso-ir-153
+Alias: csISO153GOST1976874
+
+Name: ISO_8859-supp                                       [RFC1345,KXS2]
+MIBenum: 95
+Source: ECMA registry
+Alias: iso-ir-154
+Alias: latin1-2-5
+Alias: csISO8859Supp
+
+Name: ISO_10367-box                                       [RFC1345,KXS2]
+MIBenum: 96
+Source: ECMA registry
+Alias: iso-ir-155
+Alias: csISO10367Box
+
+Name: ISO-8859-10 (preferred MIME name)			  [RFC1345,KXS2]
+MIBenum: 13
+Source: ECMA registry
+Alias: iso-ir-157
+Alias: l6
+Alias: ISO_8859-10:1992
+Alias: csISOLatin6
+Alias: latin6
+
+Name: latin-lap                                           [RFC1345,KXS2]
+MIBenum: 97
+Source: ECMA registry
+Alias: lap
+Alias: iso-ir-158
+Alias: csISO158Lap
+
+Name: JIS_X0212-1990                                      [RFC1345,KXS2]
+MIBenum: 98
+Source: ECMA registry
+Alias: x0212
+Alias: iso-ir-159
+Alias: csISO159JISX02121990
+
+Name: DS_2089                                             [RFC1345,KXS2]
+MIBenum: 99
+Source: Danish Standard, DS 2089, February 1974
+Alias: DS2089
+Alias: ISO646-DK
+Alias: dk
+Alias: csISO646Danish
+
+Name: us-dk                                               [RFC1345,KXS2]
+MIBenum: 100
+Alias: csUSDK
+
+Name: dk-us                                               [RFC1345,KXS2]
+MIBenum: 101
+Alias: csDKUS
+
+Name: JIS_X0201                                           [RFC1345,KXS2]
+MIBenum: 15
+Source: JIS X 0201-1976.   One byte only, this is equivalent to 
+        JIS/Roman (similar to ASCII) plus eight-bit half-width 
+        Katakana
+Alias: X0201
+Alias: csHalfWidthKatakana
+
+Name: KSC5636                                             [RFC1345,KXS2]
+MIBenum: 102
+Alias: ISO646-KR
+Alias: csKSC5636
+
+Name: ISO-10646-UCS-2
+MIBenum: 1000
+Source: the 2-octet Basic Multilingual Plane, aka Unicode
+        this needs to specify network byte order: the standard
+        does not specify (it is a 16-bit integer space)
+Alias: csUnicode
+
+Name: ISO-10646-UCS-4
+MIBenum: 1001
+Source: the full code space. (same comment about byte order,
+        these are 31-bit numbers.
+Alias: csUCS4
+
+Name: DEC-MCS                                             [RFC1345,KXS2]
+MIBenum: 2008
+Source: VAX/VMS User's Manual, 
+        Order Number: AI-Y517A-TE, April 1986.
+Alias: dec
+Alias: csDECMCS
+
+Name: hp-roman8                                  [HP-PCL5,RFC1345,KXS2]
+MIBenum: 2004
+Source: LaserJet IIP Printer User's Manual, 
+        HP part no 33471-90901, Hewlet-Packard, June 1989.
+Alias: roman8
+Alias: r8
+Alias: csHPRoman8
+
+Name: macintosh                                           [RFC1345,KXS2]
+MIBenum: 2027
+Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
+Alias: mac
+Alias: csMacintosh
+
+Name: IBM037                                              [RFC1345,KXS2]
+MIBenum: 2028
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp037
+Alias: ebcdic-cp-us
+Alias: ebcdic-cp-ca
+Alias: ebcdic-cp-wt
+Alias: ebcdic-cp-nl
+Alias: csIBM037
+
+Name: IBM038                                              [RFC1345,KXS2]
+MIBenum: 2029
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-INT
+Alias: cp038
+Alias: csIBM038
+
+Name: IBM273                                              [RFC1345,KXS2]
+MIBenum: 2030
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP273
+Alias: csIBM273
+
+Name: IBM274                                              [RFC1345,KXS2]
+MIBenum: 2031
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-BE
+Alias: CP274
+Alias: csIBM274
+
+Name: IBM275                                              [RFC1345,KXS2]
+MIBenum: 2032
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-BR
+Alias: cp275
+Alias: csIBM275
+
+Name: IBM277                                              [RFC1345,KXS2]
+MIBenum: 2033
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-CP-DK
+Alias: EBCDIC-CP-NO
+Alias: csIBM277
+
+Name: IBM278                                              [RFC1345,KXS2]
+MIBenum: 2034
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP278
+Alias: ebcdic-cp-fi
+Alias: ebcdic-cp-se
+Alias: csIBM278
+
+Name: IBM280                                              [RFC1345,KXS2]
+MIBenum: 2035
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP280
+Alias: ebcdic-cp-it
+Alias: csIBM280
+
+Name: IBM281                                              [RFC1345,KXS2]
+MIBenum: 2036
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-JP-E
+Alias: cp281
+Alias: csIBM281
+
+Name: IBM284                                              [RFC1345,KXS2]
+MIBenum: 2037
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP284
+Alias: ebcdic-cp-es
+Alias: csIBM284
+
+Name: IBM285                                              [RFC1345,KXS2]
+MIBenum: 2038
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP285
+Alias: ebcdic-cp-gb
+Alias: csIBM285
+
+Name: IBM290                                              [RFC1345,KXS2]
+MIBenum: 2039
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: cp290
+Alias: EBCDIC-JP-kana
+Alias: csIBM290
+
+Name: IBM297                                              [RFC1345,KXS2]
+MIBenum: 2040
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp297
+Alias: ebcdic-cp-fr
+Alias: csIBM297
+
+Name: IBM420                                              [RFC1345,KXS2]
+MIBenum: 2041
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990, 
+        IBM NLS RM p 11-11
+Alias: cp420
+Alias: ebcdic-cp-ar1
+Alias: csIBM420
+
+Name: IBM423                                              [RFC1345,KXS2]
+MIBenum: 2042
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp423
+Alias: ebcdic-cp-gr
+Alias: csIBM423
+
+Name: IBM424                                              [RFC1345,KXS2]
+MIBenum: 2043
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp424
+Alias: ebcdic-cp-he
+Alias: csIBM424
+
+Name: IBM437                                              [RFC1345,KXS2]
+MIBenum: 2011
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp437
+Alias: 437
+Alias: csPC8CodePage437
+
+Name: IBM500                                              [RFC1345,KXS2]
+MIBenum: 2044
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP500
+Alias: ebcdic-cp-be
+Alias: ebcdic-cp-ch
+Alias: csIBM500
+
+Name: IBM775                                                   [HP-PCL5]
+MIBenum: 2087
+Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
+Alias: cp775
+Alias: csPC775Baltic
+
+Name: IBM850                                              [RFC1345,KXS2]
+MIBenum: 2009
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp850
+Alias: 850
+Alias: csPC850Multilingual
+
+Name: IBM851                                              [RFC1345,KXS2]
+MIBenum: 2045
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp851
+Alias: 851
+Alias: csIBM851
+
+Name: IBM852                                              [RFC1345,KXS2]
+MIBenum: 2010
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp852
+Alias: 852
+Alias: csPCp852
+
+Name: IBM855                                              [RFC1345,KXS2]
+MIBenum: 2046
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp855
+Alias: 855
+Alias: csIBM855
+
+Name: IBM857                                              [RFC1345,KXS2]
+MIBenum: 2047
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp857
+Alias: 857
+Alias: csIBM857
+
+Name: IBM860                                              [RFC1345,KXS2]
+MIBenum: 2048
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp860
+Alias: 860
+Alias: csIBM860
+
+Name: IBM861                                              [RFC1345,KXS2]
+MIBenum: 2049
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp861
+Alias: 861
+Alias: cp-is
+Alias: csIBM861
+
+Name: IBM862                                              [RFC1345,KXS2]
+MIBenum: 2013
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp862
+Alias: 862
+Alias: csPC862LatinHebrew
+
+Name: IBM863                                              [RFC1345,KXS2]
+MIBenum: 2050
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp863
+Alias: 863
+Alias: csIBM863
+
+Name: IBM864                                              [RFC1345,KXS2]
+MIBenum: 2051
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp864
+Alias: csIBM864
+
+Name: IBM865                                              [RFC1345,KXS2]
+MIBenum: 2052
+Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
+Alias: cp865
+Alias: 865
+Alias: csIBM865
+
+Name: IBM866                                                     [Pond]
+MIBenum: 2086
+Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
+Alias: cp866
+Alias: 866
+Alias: csIBM866
+
+Name: IBM868                                              [RFC1345,KXS2]
+MIBenum: 2053
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP868
+Alias: cp-ar
+Alias: csIBM868
+
+Name: IBM869                                              [RFC1345,KXS2]
+MIBenum: 2054
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp869
+Alias: 869
+Alias: cp-gr
+Alias: csIBM869
+
+Name: IBM870                                              [RFC1345,KXS2]
+MIBenum: 2055
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP870
+Alias: ebcdic-cp-roece
+Alias: ebcdic-cp-yu
+Alias: csIBM870
+
+Name: IBM871                                              [RFC1345,KXS2]
+MIBenum: 2056
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP871
+Alias: ebcdic-cp-is
+Alias: csIBM871
+
+Name: IBM880                                              [RFC1345,KXS2]
+MIBenum: 2057
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp880
+Alias: EBCDIC-Cyrillic
+Alias: csIBM880
+
+Name: IBM891                                              [RFC1345,KXS2]
+MIBenum: 2058
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp891
+Alias: csIBM891
+
+Name: IBM903                                              [RFC1345,KXS2]
+MIBenum: 2059
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp903
+Alias: csIBM903
+
+Name: IBM904                                              [RFC1345,KXS2]
+MIBenum: 2060
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp904
+Alias: 904
+Alias: csIBBM904
+
+Name: IBM905                                              [RFC1345,KXS2]
+MIBenum: 2061
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: CP905
+Alias: ebcdic-cp-tr
+Alias: csIBM905
+
+Name: IBM918                                              [RFC1345,KXS2]
+MIBenum: 2062
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP918
+Alias: ebcdic-cp-ar2
+Alias: csIBM918
+
+Name: IBM1026                                             [RFC1345,KXS2]
+MIBenum: 2063
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP1026
+Alias: csIBM1026
+
+Name: EBCDIC-AT-DE                                        [RFC1345,KXS2]
+MIBenum: 2064
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csIBMEBCDICATDE
+
+Name: EBCDIC-AT-DE-A                                      [RFC1345,KXS2]
+MIBenum: 2065 
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 
+Alias: csEBCDICATDEA
+
+Name: EBCDIC-CA-FR                                        [RFC1345,KXS2]
+MIBenum: 2066
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICCAFR
+
+Name: EBCDIC-DK-NO                                        [RFC1345,KXS2]
+MIBenum: 2067
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNO
+
+Name: EBCDIC-DK-NO-A                                      [RFC1345,KXS2]
+MIBenum: 2068
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNOA
+
+Name: EBCDIC-FI-SE                                        [RFC1345,KXS2]
+MIBenum: 2069
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISE
+
+Name: EBCDIC-FI-SE-A                                      [RFC1345,KXS2]
+MIBenum: 2070
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISEA
+
+Name: EBCDIC-FR                                           [RFC1345,KXS2]
+MIBenum: 2071
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFR
+
+Name: EBCDIC-IT                                           [RFC1345,KXS2]
+MIBenum: 2072
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICIT
+
+Name: EBCDIC-PT                                           [RFC1345,KXS2]
+MIBenum: 2073
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICPT
+
+Name: EBCDIC-ES                                           [RFC1345,KXS2]
+MIBenum: 2074
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICES
+
+Name: EBCDIC-ES-A                                         [RFC1345,KXS2]
+MIBenum: 2075
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESA
+
+Name: EBCDIC-ES-S                                         [RFC1345,KXS2]
+MIBenum: 2076
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESS
+
+Name: EBCDIC-UK                                           [RFC1345,KXS2]
+MIBenum: 2077
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUK
+
+Name: EBCDIC-US                                           [RFC1345,KXS2]
+MIBenum: 2078
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUS
+
+Name: UNKNOWN-8BIT                                             [RFC1428]
+MIBenum: 2079
+Alias: csUnknown8BiT
+
+Name: MNEMONIC                                            [RFC1345,KXS2]
+MIBenum: 2080
+Source: RFC 1345, also known as "mnemonic+ascii+38"
+Alias: csMnemonic
+
+Name: MNEM                                                [RFC1345,KXS2]
+MIBenum: 2081
+Source: RFC 1345, also known as "mnemonic+ascii+8200"
+Alias: csMnem
+
+Name: VISCII                                                   [RFC1456]
+MIBenum: 2082
+Source: RFC 1456
+Alias: csVISCII
+
+Name: VIQR                                                     [RFC1456]
+MIBenum: 2083
+Source: RFC 1456
+Alias: csVIQR
+
+Name: KOI8-R  (preferred MIME name)                            [RFC1489]
+MIBenum: 2084
+Source: RFC 1489, based on GOST-19768-74, ISO-6937/8, 
+        INIS-Cyrillic, ISO-5427.
+Alias: csKOI8R
+
+Name: KOI8-U                                                   [RFC2319]
+MIBenum: 2088
+Source: RFC 2319
+
+Name: IBM00858
+MIBenum: 2089
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858)    [Mahdi]
+Alias: CCSID00858
+Alias: CP00858
+Alias: PC-Multilingual-850+euro
+
+Name: IBM00924
+MIBenum: 2090
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924)    [Mahdi]
+Alias: CCSID00924
+Alias: CP00924
+Alias: ebcdic-Latin9--euro
+
+Name: IBM01140
+MIBenum: 2091
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140)    [Mahdi]
+Alias: CCSID01140
+Alias: CP01140
+Alias: ebcdic-us-37+euro
+
+Name: IBM01141
+MIBenum: 2092
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141)    [Mahdi]
+Alias: CCSID01141
+Alias: CP01141
+Alias: ebcdic-de-273+euro
+
+Name: IBM01142
+MIBenum: 2093
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142)    [Mahdi]
+Alias: CCSID01142
+Alias: CP01142
+Alias: ebcdic-dk-277+euro
+Alias: ebcdic-no-277+euro
+
+Name: IBM01143
+MIBenum: 2094
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143)    [Mahdi]
+Alias: CCSID01143
+Alias: CP01143
+Alias: ebcdic-fi-278+euro
+Alias: ebcdic-se-278+euro
+
+Name: IBM01144
+MIBenum: 2095
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144)    [Mahdi]
+Alias: CCSID01144
+Alias: CP01144
+Alias: ebcdic-it-280+euro
+
+Name: IBM01145
+MIBenum: 2096
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145)    [Mahdi]
+Alias: CCSID01145
+Alias: CP01145
+Alias: ebcdic-es-284+euro
+
+Name: IBM01146
+MIBenum: 2097
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146)    [Mahdi]
+Alias: CCSID01146
+Alias: CP01146
+Alias: ebcdic-gb-285+euro
+
+Name: IBM01147
+MIBenum: 2098
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147)    [Mahdi]
+Alias: CCSID01147
+Alias: CP01147
+Alias: ebcdic-fr-297+euro
+
+Name: IBM01148
+MIBenum: 2099
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148)    [Mahdi]
+Alias: CCSID01148
+Alias: CP01148
+Alias: ebcdic-international-500+euro
+
+Name: IBM01149
+MIBenum: 2100
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149)    [Mahdi]
+Alias: CCSID01149
+Alias: CP01149
+Alias: ebcdic-is-871+euro
+
+Name: Big5-HKSCS                                                  [Yick]
+MIBenum: 2101
+Source:   See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS) 
+Alias: None
+
+Name: IBM1047                                                [Robrigado]
+MIBenum: 2102
+Source: IBM1047 (EBCDIC Latin 1/Open Systems)
+http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+Alias: IBM-1047
+
+Name: PTCP154                                                    [Uskov]
+MIBenum: 2103
+Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
+Alias: csPTCP154
+Alias: PT154
+Alias: CP154
+Alias: Cyrillic-Asian
+
+Name:  Amiga-1251
+MIBenum:  2104
+Source:  See (http://www.amiga.ultranet.ru/Amiga-1251.html)
+Alias:  Ami1251
+Alias:  Amiga1251
+Alias:  Ami-1251
+(Aliases are provided for historical reasons and should not be used)
+                                                              [Malyshev]
+															  
+Name:  KOI7-switched
+MIBenum:  2105
+Source:  See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
+Aliases:  None
+
+Name: UNICODE-1-1                                              [RFC1641]
+MIBenum: 1010
+Source: RFC 1641
+Alias: csUnicode11
+
+Name: SCSU
+MIBenum: 1011
+Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU)     [Scherer]
+Alias: None 
+
+Name: UTF-7                                                    [RFC2152]
+MIBenum: 1012
+Source: RFC 2152
+Alias: None
+
+Name: UTF-16BE                                                 [RFC2781]
+MIBenum: 1013
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16LE                                                 [RFC2781]
+MIBenum: 1014
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16                                                   [RFC2781]
+MIBenum: 1015
+Source: RFC 2781
+Alias: None
+
+Name: CESU-8                                                    [Phipps]
+MIBenum: 1016
+Source: <http://www.unicode.org/unicode/reports/tr26>
+Alias: csCESU-8
+
+Name: UTF-32                                                     [Davis] 
+MIBenum: 1017
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32BE                                                   [Davis]
+MIBenum: 1018
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32LE                                                   [Davis]
+MIBenum: 1019
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: BOCU-1                                                   [Scherer]
+MIBenum: 1020
+Source: http://www.unicode.org/notes/tn6/
+Alias: csBOCU-1
+
+Name: UNICODE-1-1-UTF-7                                        [RFC1642]
+MIBenum: 103
+Source: RFC 1642
+Alias: csUnicode11UTF7
+
+Name: UTF-8                                                    [RFC3629]
+MIBenum: 106
+Source: RFC 3629
+Alias: None 
+
+Name: ISO-8859-13
+MIBenum: 109
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis] 
+Alias: None
+
+Name: ISO-8859-14
+MIBenum: 110
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
+Alias: iso-ir-199
+Alias: ISO_8859-14:1998
+Alias: ISO_8859-14
+Alias: latin8
+Alias: iso-celtic
+Alias: l8
+
+Name: ISO-8859-15
+MIBenum: 111
+Source: ISO 
+        Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
+Alias: ISO_8859-15
+Alias: Latin-9
+
+Name: ISO-8859-16
+MIBenum: 112
+Source: ISO
+Alias: iso-ir-226
+Alias: ISO_8859-16:2001
+Alias: ISO_8859-16
+Alias: latin10
+Alias: l10 
+
+Name: GBK                                                 
+MIBenum: 113
+Source: Chinese IT Standardization Technical Committee  
+        Please see: <http://www.iana.org/assignments/charset-reg/GBK>
+Alias: CP936
+Alias: MS936
+Alias: windows-936
+
+Name: GB18030
+MIBenum: 114
+Source: Chinese IT Standardization Technical Committee
+        Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
+Alias: None
+
+Name:  OSD_EBCDIC_DF04_15
+MIBenum:  115
+Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
+         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
+Alias:   None
+
+Name:  OSD_EBCDIC_DF03_IRV
+MIBenum:  116
+Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
+         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
+Alias:  None
+
+Name:  OSD_EBCDIC_DF04_1
+MIBenum:  117
+Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
+         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
+Alias:  None   
+
+Name: JIS_Encoding
+MIBenum: 16
+Source: JIS X 0202-1991.  Uses ISO 2022 escape sequences to
+        shift code sets as documented in JIS X 0202-1991.
+Alias: csJISEncoding
+
+Name: Shift_JIS  (preferred MIME name)
+MIBenum: 17
+Source: This charset is an extension of csHalfWidthKatakana by
+        adding graphic characters in JIS X 0208.  The CCS's are
+        JIS X0201:1997 and JIS X0208:1997.  The
+        complete definition is shown in Appendix 1 of JIS
+        X0208:1997.
+        This charset can be used for the top-level media type "text".
+Alias: MS_Kanji 
+Alias: csShiftJIS
+
+Name: Extended_UNIX_Code_Packed_Format_for_Japanese
+MIBenum: 18
+Source: Standardized by OSF, UNIX International, and UNIX Systems
+        Laboratories Pacific.  Uses ISO 2022 rules to select
+               code set 0: US-ASCII (a single 7-bit byte set)
+               code set 1: JIS X0208-1990 (a double 8-bit byte set)
+                           restricted to A0-FF in both bytes
+               code set 2: Half Width Katakana (a single 7-bit byte set)
+                           requiring SS2 as the character prefix
+               code set 3: JIS X0212-1990 (a double 7-bit byte set)
+                           restricted to A0-FF in both bytes
+                           requiring SS3 as the character prefix
+Alias: csEUCPkdFmtJapanese
+Alias: EUC-JP  (preferred MIME name)
+
+Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
+MIBenum: 19
+Source: Used in Japan.  Each character is 2 octets.
+                code set 0: US-ASCII (a single 7-bit byte set)
+                              1st byte = 00
+                              2nd byte = 20-7E
+                code set 1: JIS X0208-1990 (a double 7-bit byte set)
+                            restricted  to A0-FF in both bytes 
+                code set 2: Half Width Katakana (a single 7-bit byte set)
+                              1st byte = 00
+                              2nd byte = A0-FF
+                code set 3: JIS X0212-1990 (a double 7-bit byte set)
+                            restricted to A0-FF in 
+                            the first byte
+                and 21-7E in the second byte
+Alias: csEUCFixWidJapanese
+
+Name: ISO-10646-UCS-Basic
+MIBenum: 1002
+Source: ASCII subset of Unicode.  Basic Latin = collection 1
+        See ISO 10646, Appendix A
+Alias: csUnicodeASCII
+
+Name: ISO-10646-Unicode-Latin1
+MIBenum: 1003
+Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 
+         Supplement  = collections 1 and 2.  See ISO 10646, 
+         Appendix A.  See RFC 1815.
+Alias: csUnicodeLatin1
+Alias: ISO-10646
+
+Name: ISO-10646-J-1
+Source: ISO 10646 Japanese, see RFC 1815.
+
+Name: ISO-Unicode-IBM-1261
+MIBenum: 1005
+Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
+Alias: csUnicodeIBM1261
+
+Name: ISO-Unicode-IBM-1268
+MIBenum: 1006
+Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
+Alias: csUnicodeIBM1268
+
+Name: ISO-Unicode-IBM-1276
+MIBenum: 1007
+Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
+Alias: csUnicodeIBM1276
+
+Name: ISO-Unicode-IBM-1264
+MIBenum: 1008
+Source: IBM Arabic Presentation Set, GCSGID: 1264
+Alias: csUnicodeIBM1264
+
+Name: ISO-Unicode-IBM-1265
+MIBenum: 1009
+Source: IBM Hebrew Presentation Set, GCSGID: 1265
+Alias: csUnicodeIBM1265
+
+Name: ISO-8859-1-Windows-3.0-Latin-1                           [HP-PCL5] 
+MIBenum: 2000
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.  
+        PCL Symbol Set id: 9U
+Alias: csWindows30Latin1
+
+Name: ISO-8859-1-Windows-3.1-Latin-1                           [HP-PCL5] 
+MIBenum: 2001
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.  
+        PCL Symbol Set id: 19U
+Alias: csWindows31Latin1
+
+Name: ISO-8859-2-Windows-Latin-2                               [HP-PCL5] 
+MIBenum: 2002
+Source: Extended ISO 8859-2.  Latin-2 for Windows 3.1.
+        PCL Symbol Set id: 9E
+Alias: csWindows31Latin2
+
+Name: ISO-8859-9-Windows-Latin-5                               [HP-PCL5] 
+MIBenum: 2003
+Source: Extended ISO 8859-9.  Latin-5 for Windows 3.1
+        PCL Symbol Set id: 5T
+Alias: csWindows31Latin5
+
+Name: Adobe-Standard-Encoding                                    [Adobe]
+MIBenum: 2005
+Source: PostScript Language Reference Manual
+        PCL Symbol Set id: 10J
+Alias: csAdobeStandardEncoding
+
+Name: Ventura-US                                               [HP-PCL5]
+MIBenum: 2006
+Source: Ventura US.  ASCII plus characters typically used in 
+        publishing, like pilcrow, copyright, registered, trade mark, 
+        section, dagger, and double dagger in the range A0 (hex) 
+        to FF (hex).  
+        PCL Symbol Set id: 14J
+Alias: csVenturaUS  
+
+Name: Ventura-International                                    [HP-PCL5]
+MIBenum: 2007
+Source: Ventura International.  ASCII plus coded characters similar 
+        to Roman8.
+        PCL Symbol Set id: 13J
+Alias: csVenturaInternational
+
+Name: PC8-Danish-Norwegian                                     [HP-PCL5]
+MIBenum: 2012
+Source: PC Danish Norwegian
+        8-bit PC set for Danish Norwegian
+        PCL Symbol Set id: 11U
+Alias: csPC8DanishNorwegian
+
+Name: PC8-Turkish                                              [HP-PCL5]
+MIBenum: 2014
+Source: PC Latin Turkish.  PCL Symbol Set id: 9T
+Alias: csPC8Turkish
+
+Name: IBM-Symbols                                             [IBM-CIDT] 
+MIBenum: 2015
+Source: Presentation Set, CPGID: 259
+Alias: csIBMSymbols
+
+Name: IBM-Thai                                                [IBM-CIDT] 
+MIBenum: 2016
+Source: Presentation Set, CPGID: 838
+Alias: csIBMThai
+
+Name: HP-Legal                                                 [HP-PCL5]
+MIBenum: 2017
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 1U
+Alias: csHPLegal
+
+Name: HP-Pi-font                                               [HP-PCL5]
+MIBenum: 2018
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 15U
+Alias: csHPPiFont
+
+Name: HP-Math8                                                 [HP-PCL5]
+MIBenum: 2019
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 8M
+Alias: csHPMath8
+
+Name: Adobe-Symbol-Encoding                                      [Adobe]
+MIBenum: 2020
+Source: PostScript Language Reference Manual
+        PCL Symbol Set id: 5M
+Alias: csHPPSMath
+
+Name: HP-DeskTop                                               [HP-PCL5]
+MIBenum: 2021
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 7J
+Alias: csHPDesktop
+
+Name: Ventura-Math                                             [HP-PCL5]
+MIBenum: 2022
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 6M
+Alias: csVenturaMath
+
+Name: Microsoft-Publishing                                     [HP-PCL5]
+MIBenum: 2023
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 6J
+Alias: csMicrosoftPublishing
+
+Name: Windows-31J
+MIBenum: 2024
+Source: Windows Japanese.  A further extension of Shift_JIS
+        to include NEC special characters (Row 13), NEC
+        selection of IBM extensions (Rows 89 to 92), and IBM
+        extensions (Rows 115 to 119).  The CCS's are
+        JIS X0201:1997, JIS X0208:1997, and these extensions.
+        This charset can be used for the top-level media type "text",
+        but it is of limited or specialized use (see RFC2278).
+        PCL Symbol Set id: 19K
+Alias: csWindows31J
+
+Name: GB2312  (preferred MIME name)
+MIBenum: 2025
+Source: Chinese for People's Republic of China (PRC) mixed one byte, 
+        two byte set: 
+          20-7E = one byte ASCII 
+          A1-FE = two byte PRC Kanji 
+        See GB 2312-80 
+        PCL Symbol Set Id: 18C
+Alias: csGB2312
+
+Name: Big5  (preferred MIME name)
+MIBenum: 2026
+Source: Chinese for Taiwan Multi-byte set.
+        PCL Symbol Set Id: 18T
+Alias: csBig5
+
+Name: windows-1250
+MIBenum: 2250
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
+Alias: None
+
+Name: windows-1251
+MIBenum: 2251
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
+Alias: None
+
+Name: windows-1252
+MIBenum: 2252
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1252)       [Wendt]
+Alias: None
+
+Name: windows-1253
+MIBenum: 2253
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
+Alias: None
+
+Name: windows-1254
+MIBenum: 2254
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
+Alias: None
+
+Name: windows-1255
+MIBenum: 2255
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
+Alias: None
+
+Name: windows-1256
+MIBenum: 2256
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
+Alias: None 
+
+Name: windows-1257
+MIBenum: 2257
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
+Alias: None
+
+Name: windows-1258
+MIBenum: 2258
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
+Alias: None
+
+Name: TIS-620
+MIBenum: 2259
+Source: Thai Industrial Standards Institute (TISI)	     [Tantsetthi]
+
+Name: HZ-GB-2312
+MIBenum: 2085
+Source: RFC 1842, RFC 1843                              [RFC1842, RFC1843]
+
+
+REFERENCES
+----------
+
+[RFC1345]  Simonsen, K., "Character Mnemonics & Character Sets",
+           RFC 1345, Rationel Almen Planlaegning, Rationel Almen
+           Planlaegning, June 1992.
+
+[RFC1428]  Vaudreuil, G., "Transition of Internet Mail from
+           Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
+           1993.
+
+[RFC1456]  Vietnamese Standardization Working Group, "Conventions for
+           Encoding the Vietnamese Language VISCII: VIetnamese 
+           Standard Code for Information Interchange VIQR: VIetnamese 
+           Quoted-Readable Specification Revision 1.1", RFC 1456, May
+           1993.
+
+[RFC1468]  Murai, J., Crispin, M., and E. van der Poel, "Japanese
+           Character Encoding for Internet Messages", RFC 1468,
+           Keio University, Panda Programming, June 1993.
+
+[RFC1489]  Chernov, A., "Registration of a Cyrillic Character Set",
+           RFC1489, RELCOM Development Team, July 1993. 
+
+[RFC1554]  Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
+           Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
+           Technology, ETL, December 1993. 
+
+[RFC1556]  Nussbacher, H., "Handling of Bi-directional Texts in MIME",
+           RFC1556, Israeli Inter-University, December 1993. 
+
+[RFC1557]  Choi, U., Chon, K., and H. Park, "Korean Character Encoding
+           for Internet Messages", KAIST, Solvit Chosun Media,
+           December 1993.
+
+[RFC1641]  Goldsmith, D., and M. Davis, "Using Unicode with MIME",
+           RFC1641, Taligent, Inc., July 1994. 
+
+[RFC1642]  Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
+           Inc., July 1994.
+
+[RFC1815]  Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
+           RFC 1815, Tokyo Institute of Technology, July 1995.
+
+
+[Adobe]    Adobe Systems Incorporated, PostScript Language Reference
+           Manual, second edition, Addison-Wesley Publishing Company,
+           Inc., 1990.
+
+[ECMA Registry]  ISO-IR: International Register of Escape Sequences
+           http://www.itscj.ipsj.or.jp/ISO-IE/  Note: The current
+           registration authority is IPSJ/ITSCJ, Japan.
+
+[HP-PCL5]  Hewlett-Packard Company, "HP PCL 5 Comparison Guide", 
+           (P/N 5021-0329) pp B-13, 1996.
+
+[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
+           for Core Interchange Digitized Type", Publication number
+           S544-3708-01
+
+[RFC1842]  Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
+           Characters-Based Chinese Character Encoding for Internet
+           Messages", RFC 1842, Harvard University, Rice University,
+           University of Maryland, August 1995.
+
+[RFC1843]  Lee, F., "HZ - A Data Format for Exchanging Files of
+           Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
+           Stanford University, August 1995.
+
+[RFC2152]  Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
+	   Format of Unicode", RFC 2152, Apple Computer, Inc.,
+	   Taligent Inc., May 1997.
+
+[RFC2279]  Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
+           RFC 2279, Alis Technologies, January, 1998.
+
+[RFC2781]  Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
+           RFC 2781, February 2000.
+
+[RFC3629]  Yergeau, F., "UTF-8, a transformation format of ISO 10646",
+           RFC3629, November 2003.
+
+PEOPLE
+------
+
+[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk>
+
+[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr>
+
+[Davis] Mark Davis, <mark@unicode.org>, April 2002.
+
+[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996.
+
+[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000.
+
+[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004
+
+[Murai] Jun Murai <jun@wide.ad.jp>
+
+[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il>
+
+[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995.
+
+[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002.
+
+[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997.
+
+[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002.
+
+[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000, 
+          September 2002.
+
+[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000.
+
+[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998.
+
+[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000.
+
+[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002.
+
+[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999.
+
+[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000.
+
+[]
+
+
+
+
+
+
+
diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt
new file mode 100644
index 0000000..bb45e22
--- /dev/null
+++ b/WebCore/platform/text/mac/mac-encodings.txt
@@ -0,0 +1,45 @@
+# We'd like to eliminate this file.
+# It would be nice to get rid of dependence on the TextEncodingConvert entirely.
+# Perhaps we can prove these are not used on the web and remove them.
+# Or perhaps we can get them added to ICU.
+
+# The items on the left are names of TEC TextEncoding values (without the leading kTextEncoding).
+# The items on the right are IANA character set names. Names listed in character-sets.txt are not
+# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
+# that group.
+
+DOSChineseTrad: cp950
+DOSGreek: cp737, ibm737
+EUC_TW: EUC-TW
+ISOLatin10: ISO-8859-16
+ISOLatin6: ISO-8859-10
+ISOLatin8: ISO-8859-14
+ISOLatinThai: ISO-8859-11
+ISO_2022_JP_3: ISO-2022-JP-3
+JIS_C6226_78: JIS_C6226-1978
+JIS_X0208_83: JIS_X0208-1983
+JIS_X0208_90: JIS_X0208-1990
+JIS_X0212_90: JIS_X0212-1990
+KOI8_U: KOI8-U
+MacArabic: x-mac-arabic
+MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
+MacChineseTrad: x-mac-chinesetrad, xmactradchinese
+MacCroatian: x-mac-croatian
+MacDevanagari: x-mac-devanagari
+MacDingbats: x-mac-dingbats
+MacFarsi: x-mac-farsi
+MacGujarati: x-mac-gujarati
+MacGurmukhi: x-mac-gurmukhi
+MacHebrew: x-mac-hebrew
+MacIcelandic: x-mac-icelandic
+MacJapanese: x-mac-japanese
+MacKorean: x-mac-korean
+MacRomanLatin1: x-mac-roman-latin1
+MacRomanian: x-mac-romanian
+MacSymbol: x-mac-symbol
+MacThai: x-mac-thai
+MacTibetan: x-mac-tibetan
+MacVT100: x-mac-vt100
+NextStepLatin: x-nextstep
+ShiftJIS_X0213_00: Shift_JIS_X0213-2000
+WindowsKoreanJohab: johab
diff --git a/WebCore/platform/text/mac/make-charset-table.pl b/WebCore/platform/text/mac/make-charset-table.pl
new file mode 100755
index 0000000..16fd25a
--- /dev/null
+++ b/WebCore/platform/text/mac/make-charset-table.pl
@@ -0,0 +1,225 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1.  Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer. 
+# 2.  Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution. 
+# 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+#     its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission. 
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+use strict;
+
+my %aliasesFromCharsetsFile;
+my %namesWritten;
+
+my $output = "";
+
+my $error = 0;
+
+sub error ($)
+{
+    print STDERR @_, "\n";
+    $error = 1;
+}
+
+sub emit_line
+{
+    my ($name, $prefix, $encoding, $flags) = @_;
+ 
+    error "$name shows up twice in output" if $namesWritten{$name};
+    $namesWritten{$name} = 1;
+    
+    $output .= "        { \"$name\", $prefix$encoding },\n";
+}
+
+sub process_platform_encodings
+{
+    my ($filename, $PlatformPrefix) = @_;
+    my $baseFilename = $filename;
+    $baseFilename =~ s|.*/||;
+    
+    my %seenPlatformNames;
+    my %seenIANANames;
+    
+    open PLATFORM_ENCODINGS, $filename or die;
+    
+    while (<PLATFORM_ENCODINGS>) {
+        chomp;
+        s/\#.*$//;
+        s/\s+$//;
+        if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
+            my %aliases;
+            
+            my $PlatformNameWithFlags = $PlatformName;
+            if ($flags) {
+                $PlatformNameWithFlags .= ", " . $flags;
+            } else {
+                $flags = "NoEncodingFlags";
+            }
+            error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
+            $seenPlatformNames{$PlatformNameWithFlags} = 1;
+
+            # Build the aliases list.
+            # Also check that no two names are part of the same entry in the charsets file.
+            my @IANANames = split ", ", $IANANames;
+            my $firstName = "";
+            my $canonicalFirstName = "";
+            my $prevName = "";
+            for my $name (@IANANames) {
+                if ($firstName eq "") {
+                    if ($name !~ /^[-A-Za-z0-9_]+$/) {
+                        error "$name, in $baseFilename, has illegal characters in it";
+                        next;
+                    }
+                    $firstName = $name;
+                } else {
+                    if ($name !~ /^[a-z0-9]+$/) {
+                        error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
+                        next;
+                    }
+                    if ($name le $prevName) {
+                        error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
+                    }
+                    $prevName = $name;
+                }
+                
+                my $canonicalName = lc $name;
+                $canonicalName =~ tr/-_//d;
+                
+                $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
+                
+                error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
+                $seenIANANames{$canonicalName} = 1;
+                
+                $aliases{$canonicalName} = 1;
+                next if !$aliasesFromCharsetsFile{$canonicalName};
+                for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
+                    $aliases{$alias} = 1;
+                }
+                for my $otherName (@IANANames) {
+                    next if $canonicalName eq $otherName;
+                    if ($aliasesFromCharsetsFile{$otherName}
+                        && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
+                        && $canonicalName le $otherName) {
+                        error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
+                    }
+                }
+            }
+            
+            # write out
+            emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
+            for my $alias (sort keys %aliases) {
+                emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
+            }
+        } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
+            my $PlatformName = $1;
+            
+            error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
+            $seenPlatformNames{$PlatformName} = 1;
+        } elsif (/./) {
+            error "syntax error in $baseFilename, line $.";
+        }
+    }
+    
+    close PLATFORM_ENCODINGS;
+}
+
+sub process_iana_charset 
+{
+    my ($canonical_name, @aliases) = @_;
+    
+    return if !$canonical_name;
+    
+    my @names = sort $canonical_name, @aliases;
+    
+    for my $name (@names) {
+        $aliasesFromCharsetsFile{$name} = \@names;
+    }
+}
+
+sub process_iana_charsets
+{
+    my ($filename) = @_;
+    
+    open CHARSETS, $filename or die;
+    
+    my %seen;
+    
+    my $canonical_name;
+    my @aliases;
+    
+    my %exceptions = ( isoir91 => 1, isoir92 => 1 );
+    
+    while (<CHARSETS>) {
+        chomp;
+        if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
+            $new_canonical_name = lc $new_canonical_name;
+            $new_canonical_name =~ tr/a-z0-9//cd;
+            
+            error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
+            $seen{$new_canonical_name} = $new_canonical_name;
+            
+            process_iana_charset $canonical_name, @aliases;
+
+            $canonical_name = $new_canonical_name;
+            @aliases = ();
+        } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
+            $new_alias = lc $new_alias;
+            $new_alias =~ tr/a-z0-9//cd;
+            
+            # do this after normalizing the alias, sometimes character-sets.txt
+            # has weird escape characters, e.g. \b after None
+            next if $new_alias eq "none";
+
+            error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
+            push @aliases, $new_alias if !$seen{$new_alias};
+            $seen{$new_alias} = $canonical_name;            
+        }
+    }
+    
+    process_iana_charset $canonical_name, @aliases;
+    
+    close CHARSETS;
+}
+
+# Program body
+
+process_iana_charsets($ARGV[0]);
+process_platform_encodings($ARGV[1], $ARGV[2]);
+
+exit 1 if $error;
+
+print <<EOF
+// File generated by make-charset-table.pl. Do not edit!
+
+#include "config.h"
+#include "CharsetData.h"
+
+namespace WebCore {
+
+    const CharsetEntry CharsetTable[] = {
+$output
+        { 0, 0 }
+    };
+
+}
+EOF