diff options
Diffstat (limited to 'lib/Support')
-rw-r--r-- | lib/Support/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Support/Locale.cpp | 33 | ||||
-rw-r--r-- | lib/Support/LocaleWindows.inc | 15 | ||||
-rw-r--r-- | lib/Support/LocaleXlocale.inc | 61 | ||||
-rw-r--r-- | lib/Support/Unicode.cpp (renamed from lib/Support/LocaleGeneric.inc) | 46 |
5 files changed, 41 insertions, 115 deletions
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 5823836..3aecf3f 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport ToolOutputFile.cpp Triple.cpp Twine.cpp + Unicode.cpp YAMLParser.cpp YAMLTraits.cpp raw_os_ostream.cpp diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp index 17b9b6c..35ddf7f 100644 --- a/lib/Support/Locale.cpp +++ b/lib/Support/Locale.cpp @@ -1,10 +1,31 @@ #include "llvm/Support/Locale.h" -#include "llvm/Config/config.h" +#include "llvm/Support/Unicode.h" -#ifdef __APPLE__ -#include "LocaleXlocale.inc" -#elif LLVM_ON_WIN32 -#include "LocaleWindows.inc" +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef Text) { +#if LLVM_ON_WIN32 + return Text.size(); #else -#include "LocaleGeneric.inc" + return llvm::sys::unicode::columnWidthUTF8(Text); #endif +} + +bool isPrint(int UCS) { +#if LLVM_ON_WIN32 + // Restrict characters that we'll try to print to the the lower part of ASCII + // except for the control characters (0x20 - 0x7E). In general one can not + // reliably output code points U+0080 and higher using narrow character C/C++ + // output functions in Windows, because the meaning of the upper 128 codes is + // determined by the active code page in the console. + return ' ' <= UCS && UCS <= '~'; +#else + return llvm::sys::unicode::isPrintable(UCS); +#endif +} + +} // namespace locale +} // namespace sys +} // namespace llvm diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc deleted file mode 100644 index 28e429c..0000000 --- a/lib/Support/LocaleWindows.inc +++ /dev/null @@ -1,15 +0,0 @@ -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - return s.size(); -} - -bool isPrint(int c) { - return ' ' <= c && c <= '~'; -} - -} -} -} diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc deleted file mode 100644 index 389fe3d..0000000 --- a/lib/Support/LocaleXlocale.inc +++ /dev/null @@ -1,61 +0,0 @@ -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ManagedStatic.h" -#include <cassert> -#include <xlocale.h> - - -namespace { - struct locale_holder { - locale_holder() - : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE)) - { - assert(NULL!=l); - } - ~locale_holder() { - freelocale(l); - } - - int mbswidth(llvm::SmallString<16> s) const { - // this implementation assumes no '\0' in s - assert(s.size()==strlen(s.c_str())); - - size_t size = mbstowcs_l(NULL,s.c_str(),0,l); - assert(size!=(size_t)-1); - if (size==0) - return 0; - llvm::SmallVector<wchar_t,200> ws(size); - size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l); - assert(ws.size()==size); - return wcswidth_l(&ws[0],ws.size(),l); - } - - int isprint(int c) const { - return iswprint_l(c,l); - } - - private: - - locale_t l; - }; - - llvm::ManagedStatic<locale_holder> l; -} - -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - int width = l->mbswidth(s); - assert(width>=0); - return width; -} - -bool isPrint(int c) { - return l->isprint(c); -} - -} -} -} diff --git a/lib/Support/LocaleGeneric.inc b/lib/Support/Unicode.cpp index 9fb8953..b719bd8 100644 --- a/lib/Support/LocaleGeneric.inc +++ b/lib/Support/Unicode.cpp @@ -1,4 +1,4 @@ -//===- llvm/Support/LocaleGeneric.inc - Locale-dependent stuff -*- C++ -*-===// +//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,41 +7,20 @@ // //===----------------------------------------------------------------------===// // -// This file implements llvm::sys::locale::columnWidth and -// llvm::sys::locale::isPrint functions for UTF-8 locales. +// This file implements functions that allow querying certain properties of +// Unicode characters. // //===----------------------------------------------------------------------===// -#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Unicode.h" #include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/UnicodeCharRanges.h" namespace llvm { namespace sys { -namespace locale { +namespace unicode { -enum ColumnWidthErrors { - ErrorInvalidUTF8 = -2, - ErrorNonPrintableCharacter = -1 -}; - -/// Determines if a character is likely to be displayed correctly on the -/// terminal. Exact implementation would have to depend on the specific -/// terminal, so we define the semantic that should be suitable for generic case -/// of a terminal capable to output Unicode characters. -/// All characters from the Unicode codepoint range are considered printable -/// except for: -/// * C0 and C1 control character ranges; -/// * default ignorable code points as per 5.21 of -/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf -/// except for U+00AD SOFT HYPHEN, as it's actually displayed on most -/// terminals; -/// * format characters (category = Cf); -/// * surrogates (category = Cs); -/// * unassigned characters (category = Cn). -/// \return true if the character is considered printable. -bool isPrint(int UCS) { +bool isPrintable(int UCS) { // Sorted list of non-overlapping intervals of code points that are not // supposed to be printable. static const UnicodeCharRange NonPrintableRanges[] = { @@ -241,13 +220,13 @@ bool isPrint(int UCS) { /// with a generic Unicode-capable terminal. /// \return Character width: /// * ErrorNonPrintableCharacter (-1) for non-printable characters (as -/// identified by isPrint); +/// identified by isPrintable); /// * 0 for non-spacing and enclosing combining marks; /// * 2 for CJK characters excluding halfwidth forms; /// * 1 for all remaining characters. static inline int charWidth(int UCS) { - if (!isPrint(UCS)) + if (!isPrintable(UCS)) return ErrorNonPrintableCharacter; // Sorted list of non-spacing and enclosing combining mark intervals as @@ -361,7 +340,7 @@ static inline int charWidth(int UCS) return 1; } -int columnWidth(StringRef Text) { +int columnWidthUTF8(StringRef Text) { unsigned ColumnWidth = 0; unsigned Length; for (size_t i = 0, e = Text.size(); i < e; i += Length) { @@ -382,6 +361,7 @@ int columnWidth(StringRef Text) { return ColumnWidth; } -} -} -} +} // namespace unicode +} // namespace sys +} // namespace llvm + |