1 files changed, 50 insertions, 44 deletions
diff --git a/WebCore/platform/graphics/chromium/FontUtilsChromiumWin.cpp b/WebCore/platform/graphics/chromium/FontUtilsChromiumWin.cpp
index ed326c8..9596a4c 100644
--- a/WebCore/platform/graphics/chromium/FontUtilsChromiumWin.cpp
+++ b/WebCore/platform/graphics/chromium/FontUtilsChromiumWin.cpp
@@ -120,6 +120,52 @@ void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
         scriptFontMap[USCRIPT_HAN] = localeFamily;
 }
 
+// There are a lot of characters in USCRIPT_COMMON that can be covered
+// by fonts for scripts closely related to them. See
+// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
+// FIXME: make this more efficient with a wider coverage
+UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
+{
+    UBlockCode block = ublock_getCode(ucs4);
+    switch (block) {
+    case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
+        return USCRIPT_HAN;
+    case UBLOCK_HIRAGANA:
+    case UBLOCK_KATAKANA:
+        return USCRIPT_HIRAGANA;
+    case UBLOCK_ARABIC:
+        return USCRIPT_ARABIC;
+    case UBLOCK_THAI:
+        return USCRIPT_THAI;
+    case UBLOCK_GREEK:
+        return USCRIPT_GREEK;
+    case UBLOCK_DEVANAGARI:
+        // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
+        // font for now although they're used by other scripts as well.
+        // Without a context, we can't do any better.
+        return USCRIPT_DEVANAGARI;
+    case UBLOCK_ARMENIAN:
+        return USCRIPT_ARMENIAN;
+    case UBLOCK_GEORGIAN:
+        return USCRIPT_GEORGIAN;
+    case UBLOCK_KANNADA:
+        return USCRIPT_KANNADA;
+    default:
+        return USCRIPT_COMMON;
+    }
+}
+
+UScriptCode getScript(int ucs4)
+{
+    UErrorCode err = U_ZERO_ERROR;
+    UScriptCode script = uscript_getScript(ucs4, &err);
+    // If script is invalid, common or inherited or there's an error,
+    // infer a script based on the unicode block of a character.
+    if (script <= USCRIPT_INHERITED || U_FAILURE(err))
+        script = getScriptBasedOnUnicodeBlock(ucs4);
+    return script;
+}
+
 const int kUndefinedAscent = std::numeric_limits<int>::min();
 
 // Given an HFONT, return the ascent. If GetTextMetrics fails,
@@ -209,11 +255,9 @@ const UChar* getFallbackFamily(const UChar* characters,
     // to get a font required to render the string.
     int i = 0;
     UChar32 ucs4 = 0;
-    while (i < length && script == USCRIPT_COMMON || script == USCRIPT_INVALID_CODE) {
+    while (i < length && script == USCRIPT_COMMON) {
         U16_NEXT(characters, i, length, ucs4);
-        UErrorCode err = U_ZERO_ERROR;
-        script = uscript_getScript(ucs4, &err);
-        // silently ignore the error
+        script = getScript(ucs4);
     }
 
     // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
@@ -223,46 +267,8 @@ const UChar* getFallbackFamily(const UChar* characters,
     if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
         script = USCRIPT_HAN;
 
-    // There are a lot of characters in USCRIPT_COMMON that can be covered
-    // by fonts for scripts closely related to them. See
-    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
-    // FIXME: make this more efficient with a wider coverage
-    if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) {
-        UBlockCode block = ublock_getCode(ucs4);
-        switch (block) {
-        case UBLOCK_BASIC_LATIN:
-            script = USCRIPT_LATIN;
-            break;
-        case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
-            script = USCRIPT_HAN;
-            break;
-        case UBLOCK_HIRAGANA:
-        case UBLOCK_KATAKANA:
-            script = USCRIPT_HIRAGANA;
-            break;
-        case UBLOCK_ARABIC:
-            script = USCRIPT_ARABIC;
-            break;
-        case UBLOCK_GREEK:
-            script = USCRIPT_GREEK;
-            break;
-        case UBLOCK_DEVANAGARI:
-            // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
-            // font for now although they're used by other scripts as well.
-            // Without a context, we can't do any better.
-            script = USCRIPT_DEVANAGARI;
-            break;
-        case UBLOCK_ARMENIAN:
-            script = USCRIPT_ARMENIAN;
-            break;
-        case UBLOCK_GEORGIAN:
-            script = USCRIPT_GEORGIAN;
-            break;
-        case UBLOCK_KANNADA:
-            script = USCRIPT_KANNADA;
-            break;
-        }
-    }
+    if (script == USCRIPT_COMMON)
+        script = getScriptBasedOnUnicodeBlock(ucs4);
 
     // Another lame work-around to cover non-BMP characters.
     const UChar* family = getFontFamilyForScript(script, generic);