From f4b417c62a4f272c4cf9a074d0f7a3a97201f9db Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 17 Apr 2012 11:23:35 +0200 Subject: Update to upstream bash 4.2 This upgrades bash to from 4.1-rc to 4.2-release. See CWRU/changelog for changes. Change-Id: I926269c300cf44fa25964b5b375a148fcf11c4b7 --- lib/sh/unicode.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 lib/sh/unicode.c (limited to 'lib/sh/unicode.c') diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c new file mode 100644 index 0000000..d34fa08 --- /dev/null +++ b/lib/sh/unicode.c @@ -0,0 +1,235 @@ +/* unicode.c - functions to convert unicode characters */ + +/* Copyright (C) 2010 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see . +*/ + +#include + +#if defined (HANDLE_MULTIBYTE) + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#if HAVE_ICONV +# include +#endif + +#include + +#ifndef USHORT_MAX +# ifdef USHRT_MAX +# define USHORT_MAX USHRT_MAX +# else +# define USHORT_MAX ((unsigned short) ~(unsigned short)0) +# endif +#endif + +#if !defined (STREQ) +# define STREQ(a, b) ((a)[0] == (b)[0] && strcmp ((a), (b)) == 0) +#endif /* !STREQ */ + +#if defined (HAVE_LOCALE_CHARSET) +extern const char *locale_charset __P((void)); +#else +extern char *get_locale_var __P((char *)); +#endif + +static int u32init = 0; +static int utf8locale = 0; +#if defined (HAVE_ICONV) +static iconv_t localconv; +#endif + +#ifndef HAVE_LOCALE_CHARSET +static char * +stub_charset () +{ + char *locale, *s, *t; + + locale = get_locale_var ("LC_CTYPE"); + if (locale == 0 || *locale == 0) + return "ASCII"; + s = strrchr (locale, '.'); + if (s) + { + t = strchr (s, '@'); + if (t) + *t = 0; + return ++s; + } + else if (STREQ (locale, "UTF-8")) + return "UTF-8"; + else + return "ASCII"; +} +#endif + +/* u32toascii ? */ +int +u32tochar (wc, s) + wchar_t wc; + char *s; +{ + unsigned long x; + int l; + + x = wc; + l = (x <= UCHAR_MAX) ? 1 : ((x <= USHORT_MAX) ? 2 : 4); + + if (x <= UCHAR_MAX) + s[0] = x & 0xFF; + else if (x <= USHORT_MAX) /* assume unsigned short = 16 bits */ + { + s[0] = (x >> 8) & 0xFF; + s[1] = x & 0xFF; + } + else + { + s[0] = (x >> 24) & 0xFF; + s[1] = (x >> 16) & 0xFF; + s[2] = (x >> 8) & 0xFF; + s[3] = x & 0xFF; + } + s[l] = '\0'; + return l; +} + +int +u32toutf8 (wc, s) + wchar_t wc; + char *s; +{ + int l; + + l = (wc < 0x0080) ? 1 : ((wc < 0x0800) ? 2 : 3); + + if (wc < 0x0080) + s[0] = (unsigned char)wc; + else if (wc < 0x0800) + { + s[0] = (wc >> 6) | 0xc0; + s[1] = (wc & 0x3f) | 0x80; + } + else + { + s[0] = (wc >> 12) | 0xe0; + s[1] = ((wc >> 6) & 0x3f) | 0x80; + s[2] = (wc & 0x3f) | 0x80; + } + s[l] = '\0'; + return l; +} + +/* convert a single unicode-32 character into a multibyte string and put the + result in S, which must be large enough (at least MB_LEN_MAX bytes) */ +int +u32cconv (c, s) + unsigned long c; + char *s; +{ + wchar_t wc; + int n; +#if HAVE_ICONV + const char *charset; + char obuf[25], *optr; + size_t obytesleft; + const char *iptr; + size_t sn; +#endif + + wc = c; + +#if __STDC_ISO_10646__ + if (sizeof (wchar_t) == 4) + { + n = wctomb (s, wc); + return n; + } +#endif + +#if HAVE_NL_LANGINFO + codeset = nl_langinfo (CODESET); + if (STREQ (codeset, "UTF-8")) + { + n = u32toutf8 (wc, s); + return n; + } +#endif + +#if HAVE_ICONV + /* this is mostly from coreutils-8.5/lib/unicodeio.c */ + if (u32init == 0) + { +# if HAVE_LOCALE_CHARSET + charset = locale_charset (); /* XXX - fix later */ +# else + charset = stub_charset (); +# endif + if (STREQ (charset, "UTF-8")) + utf8locale = 1; + else + { + localconv = iconv_open (charset, "UTF-8"); + if (localconv == (iconv_t)-1) + localconv = iconv_open (charset, "ASCII"); + } + u32init = 1; + } + + if (utf8locale) + { + n = u32toutf8 (wc, s); + return n; + } + + if (localconv == (iconv_t)-1) + { + n = u32tochar (wc, s); + return n; + } + + n = u32toutf8 (wc, s); + + optr = obuf; + obytesleft = sizeof (obuf); + iptr = s; + sn = n; + + iconv (localconv, NULL, NULL, NULL, NULL); + + if (iconv (localconv, (ICONV_CONST char **)&iptr, &sn, &optr, &obytesleft) == (size_t)-1) + return n; /* You get utf-8 if iconv fails */ + + *optr = '\0'; + + /* number of chars to be copied is optr - obuf if we want to do bounds + checking */ + strcpy (s, obuf); + return (optr - obuf); +#endif + + n = u32tochar (wc, s); /* fallback */ + return n; +} + +#endif /* HANDLE_MULTIBYTE */ -- cgit v1.1