summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNarayan Kamath <narayan@google.com>2014-06-12 11:49:17 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-06-10 21:16:15 +0000
commit92bd303dd51f3d20ee1e48c8cf422d414b7cefcd (patch)
tree57c284d1568d696ce3ad5b97ccee8033bbeddfaf
parent1a608148049e1d801fd2e1b47aacb1576ca3eebe (diff)
parent12ca8820818b604c6fc30f025857ec443c83d4a3 (diff)
downloadlibcore-92bd303dd51f3d20ee1e48c8cf422d414b7cefcd.zip
libcore-92bd303dd51f3d20ee1e48c8cf422d414b7cefcd.tar.gz
libcore-92bd303dd51f3d20ee1e48c8cf422d414b7cefcd.tar.bz2
Merge "Implement Locale.toLanguageTag without ICU support."
-rw-r--r--luni/src/main/java/java/util/Locale.java288
-rw-r--r--luni/src/main/java/libcore/icu/ICU.java8
-rw-r--r--luni/src/main/native/libcore_icu_ICU.cpp31
-rw-r--r--luni/src/test/java/libcore/java/util/LocaleTest.java2
4 files changed, 256 insertions, 73 deletions
diff --git a/luni/src/main/java/java/util/Locale.java b/luni/src/main/java/java/util/Locale.java
index 163a627..5f24138 100644
--- a/luni/src/main/java/java/util/Locale.java
+++ b/luni/src/main/java/java/util/Locale.java
@@ -340,9 +340,13 @@ public final class Locale implements Cloneable, Serializable {
* @throws IllformedLocaleException if the language was invalid.
*/
public Builder setLanguage(String language) {
+ this.language = normalizeAndValidateLanguage(language);
+ return this;
+ }
+
+ private static String normalizeAndValidateLanguage(String language) {
if (language == null || language.isEmpty()) {
- this.language = "";
- return this;
+ return "";
}
final String lowercaseLanguage = language.toLowerCase(Locale.ROOT);
@@ -350,8 +354,7 @@ public final class Locale implements Cloneable, Serializable {
throw new IllformedLocaleException("Invalid language: " + language);
}
- this.language = lowercaseLanguage;
- return this;
+ return lowercaseLanguage;
}
/**
@@ -397,9 +400,13 @@ public final class Locale implements Cloneable, Serializable {
* @throws IllformedLocaleException if {@code} region is invalid.
*/
public Builder setRegion(String region) {
+ this.region = normalizeAndValidateRegion(region);
+ return this;
+ }
+
+ private static String normalizeAndValidateRegion(String region) {
if (region == null || region.isEmpty()) {
- this.region = "";
- return this;
+ return "";
}
final String uppercaseRegion = region.toUpperCase(Locale.ROOT);
@@ -408,8 +415,7 @@ public final class Locale implements Cloneable, Serializable {
throw new IllformedLocaleException("Invalid region: " + region);
}
- this.region = uppercaseRegion;
- return this;
+ return uppercaseRegion;
}
/**
@@ -432,9 +438,13 @@ public final class Locale implements Cloneable, Serializable {
* @throws IllformedLocaleException if {@code} variant is invalid.
*/
public Builder setVariant(String variant) {
+ this.variant = normalizeAndValidateVariant(variant);
+ return this;
+ }
+
+ private static String normalizeAndValidateVariant(String variant) {
if (variant == null || variant.isEmpty()) {
- this.variant = "";
- return this;
+ return "";
}
// Note that unlike extensions, we canonicalize to lower case alphabets
@@ -461,8 +471,7 @@ public final class Locale implements Cloneable, Serializable {
}
- this.variant = normalizedVariant;
- return this;
+ return normalizedVariant;
}
/**
@@ -766,7 +775,7 @@ public final class Locale implements Cloneable, Serializable {
// because the RI allows this builder to reused.
return new Locale(language, region, variant, script,
attributes, keywords, extensions,
- false /* from public constructor */);
+ true /* has validated fields */);
}
}
@@ -793,10 +802,6 @@ public final class Locale implements Cloneable, Serializable {
private transient String variantCode;
private transient String scriptCode;
- private transient String cachedToStringResult;
- private transient String cachedLanguageTag;
- private transient String cachedIcuLocaleId;
-
/* Sorted, Unmodifiable */
private transient Set<String> unicodeAttributes;
/* Sorted, Unmodifiable */
@@ -805,13 +810,23 @@ public final class Locale implements Cloneable, Serializable {
private transient Map<Character, String> extensions;
/**
+ * Whether this instance was constructed from a builder. We can make
+ * stronger assumptions about the validity of Locale fields if this was
+ * constructed by a builder.
+ */
+ private transient final boolean hasValidatedFields;
+
+ private transient String cachedToStringResult;
+ private transient String cachedLanguageTag;
+ private transient String cachedIcuLocaleId;
+
+ /**
* There's a circular dependency between toLowerCase/toUpperCase and
* Locale.US. Work around this by avoiding these methods when constructing
* the built-in locales.
- *
- * @param unused required for this constructor to have a unique signature
*/
- private Locale(boolean unused, String lowerCaseLanguageCode, String upperCaseCountryCode) {
+ private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode,
+ String upperCaseCountryCode) {
this.languageCode = lowerCaseLanguageCode;
this.countryCode = upperCaseCountryCode;
this.variantCode = "";
@@ -820,6 +835,8 @@ public final class Locale implements Cloneable, Serializable {
this.unicodeAttributes = Collections.EMPTY_SET;
this.unicodeKeywords = Collections.EMPTY_MAP;
this.extensions = Collections.EMPTY_MAP;
+
+ this.hasValidatedFields = hasValidatedFields;
}
/**
@@ -827,7 +844,7 @@ public final class Locale implements Cloneable, Serializable {
*/
public Locale(String language) {
this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
- Collections.EMPTY_MAP, true /* from public constructor */);
+ Collections.EMPTY_MAP, false /* has validated fields */);
}
/**
@@ -835,7 +852,7 @@ public final class Locale implements Cloneable, Serializable {
*/
public Locale(String language, String country) {
this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
- Collections.EMPTY_MAP, true /* from public constructor */);
+ Collections.EMPTY_MAP, false /* has validated fields */);
}
/**
@@ -847,14 +864,18 @@ public final class Locale implements Cloneable, Serializable {
/* nonnull */ Set<String> unicodeAttributes,
/* nonnull */ Map<String, String> unicodeKeywords,
/* nonnull */ Map<Character, String> extensions,
- boolean fromPublicConstructor) {
+ boolean hasValidatedFields) {
if (language == null || country == null || variant == null) {
throw new NullPointerException("language=" + language +
",country=" + country +
",variant=" + variant);
}
- if (fromPublicConstructor) {
+ if (hasValidatedFields) {
+ this.languageCode = adjustLanguageCode(language);
+ this.countryCode = country;
+ this.variantCode = variant;
+ } else {
if (language.isEmpty() && country.isEmpty()) {
languageCode = "";
countryCode = "";
@@ -864,19 +885,11 @@ public final class Locale implements Cloneable, Serializable {
countryCode = country.toUpperCase(Locale.US);
variantCode = variant;
}
- } else {
- this.languageCode = adjustLanguageCode(language);
- this.countryCode = country;
- this.variantCode = variant;
}
this.scriptCode = scriptCode;
- if (fromPublicConstructor) {
- this.unicodeAttributes = unicodeAttributes;
- this.unicodeKeywords = unicodeKeywords;
- this.extensions = extensions;
- } else {
+ if (hasValidatedFields) {
Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes);
Map<String, String> keywordsCopy = new TreeMap<String, String>(
unicodeKeywords);
@@ -893,7 +906,13 @@ public final class Locale implements Cloneable, Serializable {
this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy);
this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy);
this.extensions = Collections.unmodifiableMap(extensionsCopy);
+ } else {
+ this.unicodeAttributes = unicodeAttributes;
+ this.unicodeKeywords = unicodeKeywords;
+ this.extensions = extensions;
}
+
+ this.hasValidatedFields = hasValidatedFields;
}
/**
@@ -903,7 +922,7 @@ public final class Locale implements Cloneable, Serializable {
public Locale(String language, String country, String variant) {
this(language, country, variant, "", Collections.EMPTY_SET,
Collections.EMPTY_MAP, Collections.EMPTY_MAP,
- true /* from public constructor */);
+ false /* has validated fields */);
}
@Override public Object clone() {
@@ -1248,13 +1267,212 @@ public final class Locale implements Cloneable, Serializable {
*/
public String toLanguageTag() {
if (cachedLanguageTag == null) {
- cachedLanguageTag = ICU.toLanguageTag(this);
+ cachedLanguageTag = makeLanguageTag();
}
return cachedLanguageTag;
}
/**
+ * Constructs a valid BCP-47 language tag from locale fields. Additional validation
+ * is required when this Locale was not constructed using a Builder and variants
+ * set this way are treated specially.
+ *
+ * In both cases, we convert empty language tags to "und", omit invalid country tags
+ * and perform a special case conversion of "no-NO-NY" to "nn-NO".
+ */
+ private String makeLanguageTag() {
+ // We only need to revalidate the language, country and variant because
+ // the rest of the fields can only be set via the builder which validates
+ // them anyway.
+ String language = "";
+ String region = "";
+ String variant = "";
+ String illFormedVariantSubtags = "";
+
+ if (hasValidatedFields) {
+ language = languageCode;
+ region = countryCode;
+ // Note that we are required to normalize hyphens to underscores
+ // in the builder, but we must use hyphens in the BCP-47 language tag.
+ variant = variantCode.replace('_', '-');
+ } else {
+ try {
+ language = Builder.normalizeAndValidateLanguage(languageCode);
+ } catch (IllformedLocaleException ilfe) {
+ // Ignored, continue processing with "".
+ }
+
+ try {
+ region = Builder.normalizeAndValidateRegion(countryCode);
+ } catch (IllformedLocaleException ilfe) {
+ // Ignored, continue processing with "".
+ }
+
+ try {
+ variant = Builder.normalizeAndValidateVariant(variantCode);
+ } catch (IllformedLocaleException ilfe) {
+ // If our variant is ill formed, we must attempt to split it into
+ // its constituent subtags and preserve the well formed bits and
+ // move the rest to the private use extension (if they're well
+ // formed extension subtags).
+ String split[] = splitIllformedVariant(variantCode);
+
+ variant = split[0];
+ illFormedVariantSubtags = split[1];
+ }
+ }
+
+ if (language.isEmpty()) {
+ language = "und";
+ }
+
+ if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) {
+ language = "nn";
+ region = "NO";
+ variant = "";
+ }
+
+ final StringBuilder sb = new StringBuilder(16);
+ sb.append(language);
+
+ if (!scriptCode.isEmpty()) {
+ sb.append('-');
+ sb.append(scriptCode);
+ }
+
+ if (!region.isEmpty()) {
+ sb.append('-');
+ sb.append(region);
+ }
+
+ if (!variant.isEmpty()) {
+ sb.append('-');
+ sb.append(variant);
+ }
+
+ // Extensions (optional, omitted if empty). Note that we don't
+ // emit the private use extension here, but add it in the end.
+ for (Map.Entry<Character, String> extension : extensions.entrySet()) {
+ if (!extension.getKey().equals('x')) {
+ sb.append('-').append(extension.getKey());
+ sb.append('-').append(extension.getValue());
+ }
+ }
+
+ // The private use extension comes right at the very end.
+ final String privateUse = extensions.get('x');
+ if (privateUse != null) {
+ sb.append("-x-");
+ sb.append(privateUse);
+ }
+
+ // If we have any ill-formed variant subtags, we append them to the
+ // private use extension (or add a private use extension if one doesn't
+ // exist).
+ if (!illFormedVariantSubtags.isEmpty()) {
+ if (privateUse == null) {
+ sb.append("-x-lvariant-");
+ } else {
+ sb.append('-');
+ }
+ sb.append(illFormedVariantSubtags);
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Splits ill formed variants into a set of valid variant subtags (which
+ * can be used directly in language tag construction) and a set of invalid
+ * variant subtags (which can be appended to the private use extension),
+ * provided that each subtag is a valid private use extension subtag.
+ *
+ * This method returns a two element String array. The first element is a string
+ * containing the concatenation of valid variant subtags which can be appended
+ * to a BCP-47 tag directly and the second containing the concatenation of
+ * invalid variant subtags which can be appended to the private use extension
+ * directly.
+ *
+ * This method assumes that {@code variant} contains at least one ill formed
+ * variant subtag.
+ */
+ private static String[] splitIllformedVariant(String variant) {
+ final String normalizedVariant = variant.replace('_', '-');
+ final String[] subTags = normalizedVariant.split("-");
+
+ final String[] split = new String[] { "", "" };
+
+ // First go through the list of variant subtags and check if they're
+ // valid private use extension subtags. If they're not, we will omit
+ // the first such subtag and all subtags after.
+ //
+ // NOTE: |firstInvalidSubtag| is the index of the first variant
+ // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the
+ // index of the first subtag we decide to append to the private use extension.
+ //
+ // In other words:
+ // [0, firstIllformedSubtag) => expressed as variant subtags.
+ // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use
+ // extension subtags.
+ // [firstInvalidSubtag, subTags.length) => omitted.
+ int firstInvalidSubtag = subTags.length;
+ for (int i = 0; i < subTags.length; ++i) {
+ if (!isValidBcp47Alphanum(subTags[i], 1, 8)) {
+ firstInvalidSubtag = i;
+ break;
+ }
+ }
+
+ if (firstInvalidSubtag == 0) {
+ return split;
+ }
+
+ // We now consider each subtag that could potentially be appended to
+ // the private use extension and check if it's valid.
+ int firstIllformedSubtag = firstInvalidSubtag;
+ for (int i = 0; i < firstInvalidSubtag; ++i) {
+ final String subTag = subTags[i];
+ // The BCP-47 spec states that :
+ // - Subtags can be between [5, 8] alphanumeric chars in length.
+ // - Subtags that start with a number are allowed to be 4 chars in length.
+ if (subTag.length() >= 5 && subTag.length() <= 8) {
+ if (!isAsciiAlphaNum(subTag)) {
+ firstIllformedSubtag = i;
+ }
+ } else if (subTag.length() == 4) {
+ final char firstChar = subTag.charAt(0);
+ if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) {
+ firstIllformedSubtag = i;
+ }
+ } else {
+ firstIllformedSubtag = i;
+ }
+ }
+
+ split[0] = concatenateRange(subTags, 0, firstIllformedSubtag);
+ split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag);
+
+ return split;
+ }
+
+ /**
+ * Builds a string by concatenating array elements within the range [start, end).
+ * The supplied range is assumed to be valid and no checks are performed.
+ */
+ private static String concatenateRange(String[] array, int start, int end) {
+ StringBuilder builder = new StringBuilder(32);
+ for (int i = start; i < end; ++i) {
+ if (i != 0) {
+ builder.append('-');
+ }
+ builder.append(array[i]);
+ }
+
+ return builder.toString();
+ }
+
+ /**
* Returns the set of BCP-47 extensions this locale contains.
*
* See <a href="https://tools.ietf.org/html/bcp47#section-2.1">
diff --git a/luni/src/main/java/libcore/icu/ICU.java b/luni/src/main/java/libcore/icu/ICU.java
index f60f427..cf04ff9 100644
--- a/luni/src/main/java/libcore/icu/ICU.java
+++ b/luni/src/main/java/libcore/icu/ICU.java
@@ -70,10 +70,6 @@ public final class ICU {
return localeFromIcuLocaleId(icuLocaleId);
}
- public static String toLanguageTag(Locale locale) {
- return languageTagForLocale(localeIdFromLocale(locale));
- }
-
private static final int IDX_LANGUAGE = 0;
private static final int IDX_SCRIPT = 1;
private static final int IDX_REGION = 2;
@@ -239,7 +235,8 @@ public final class ICU {
return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION],
outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT],
- unicodeAttributeSet, unicodeKeywordsMap, extensionsMap, false);
+ unicodeAttributeSet, unicodeKeywordsMap, extensionsMap,
+ true /* has validated fields */);
}
/**
@@ -471,7 +468,6 @@ public final class ICU {
private static native String[] getISOCountriesNative();
private static native String localeForLanguageTag(String languageTag, boolean strict);
- public static native String languageTagForLocale(String locale);
static native boolean initLocaleDataNative(String locale, LocaleData result);
diff --git a/luni/src/main/native/libcore_icu_ICU.cpp b/luni/src/main/native/libcore_icu_ICU.cpp
index cc51722..f71e325 100644
--- a/luni/src/main/native/libcore_icu_ICU.cpp
+++ b/luni/src/main/native/libcore_icu_ICU.cpp
@@ -166,36 +166,6 @@ static jstring ICU_localeForLanguageTag(JNIEnv* env, jclass, jstring languageTag
return env->NewStringUTF(&buffer[0]);
}
-static jstring ICU_languageTagForLocale(JNIEnv* env, jclass, jstring javaLocaleId) {
- ScopedUtfChars localeID(env, javaLocaleId);
-
- // In most common cases, the BCP 47 tag will be the same size as the ICU
- // locale ID
- const size_t initialBufferSize = localeID.size() + 1;
- std::vector<char> buffer(initialBufferSize);
-
- UErrorCode status = U_ZERO_ERROR;
- const size_t outputLength = uloc_toLanguageTag(localeID.c_str(),
- &buffer[0], buffer.size(), false /* strict */, &status);
- if (status == U_BUFFER_OVERFLOW_ERROR) {
- buffer.resize(outputLength + 1);
- status = U_ZERO_ERROR;
- uloc_toLanguageTag(localeID.c_str(), &buffer[0], buffer.size(),
- false /* strict */, &status);
- }
-
- if (status == U_STRING_NOT_TERMINATED_WARNING) {
- buffer.resize(buffer.size() + 1);
- buffer[buffer.size() -1] = '\0';
- }
-
- if (maybeThrowIcuException(env, "ICU::languageTagForLocale", status)) {
- return NULL;
- }
-
- return env->NewStringUTF(&buffer[0]);
-}
-
static jint ICU_getCurrencyFractionDigits(JNIEnv* env, jclass, jstring javaCurrencyCode) {
ScopedJavaUnicodeString currencyCode(env, javaCurrencyCode);
if (!currencyCode.valid()) {
@@ -803,7 +773,6 @@ static JNINativeMethod gMethods[] = {
NATIVE_METHOD(ICU, getIcuVersion, "()Ljava/lang/String;"),
NATIVE_METHOD(ICU, getScript, "(Ljava/lang/String;)Ljava/lang/String;"),
NATIVE_METHOD(ICU, getUnicodeVersion, "()Ljava/lang/String;"),
- NATIVE_METHOD(ICU, languageTagForLocale, "(Ljava/lang/String;)Ljava/lang/String;"),
NATIVE_METHOD(ICU, localeForLanguageTag, "(Ljava/lang/String;Z)Ljava/lang/String;"),
NATIVE_METHOD(ICU, initLocaleDataNative, "(Ljava/lang/String;Llibcore/icu/LocaleData;)Z"),
NATIVE_METHOD(ICU, setDefaultLocale, "(Ljava/lang/String;)V"),
diff --git a/luni/src/test/java/libcore/java/util/LocaleTest.java b/luni/src/test/java/libcore/java/util/LocaleTest.java
index f9f62b8..b87dc97 100644
--- a/luni/src/test/java/libcore/java/util/LocaleTest.java
+++ b/luni/src/test/java/libcore/java/util/LocaleTest.java
@@ -1074,6 +1074,6 @@ public class LocaleTest extends junit.framework.TestCase {
Locale posix = new Locale.Builder()
.setLanguage("en").setRegion("US").setVariant("POSIX")
.build();
- assertEquals("en-US-u-va-posix", posix.toLanguageTag());
+ assertEquals("en-US-POSIX", posix.toLanguageTag());
}
}