aboutsummaryrefslogtreecommitdiffstats
path: root/lint/libs/lint_checks/src/com/android/tools/lint/checks/TypoLookup.java
diff options
context:
space:
mode:
Diffstat (limited to 'lint/libs/lint_checks/src/com/android/tools/lint/checks/TypoLookup.java')
-rw-r--r--lint/libs/lint_checks/src/com/android/tools/lint/checks/TypoLookup.java785
1 files changed, 0 insertions, 785 deletions
diff --git a/lint/libs/lint_checks/src/com/android/tools/lint/checks/TypoLookup.java b/lint/libs/lint_checks/src/com/android/tools/lint/checks/TypoLookup.java
deleted file mode 100644
index 2dcd6c3..0000000
--- a/lint/libs/lint_checks/src/com/android/tools/lint/checks/TypoLookup.java
+++ /dev/null
@@ -1,785 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.tools.lint.checks;
-
-import static com.android.SdkConstants.DOT_XML;
-import static com.android.tools.lint.detector.api.LintUtils.assertionsEnabled;
-
-import com.android.annotations.NonNull;
-import com.android.annotations.Nullable;
-import com.android.annotations.VisibleForTesting;
-import com.android.tools.lint.client.api.LintClient;
-import com.android.tools.lint.detector.api.LintUtils;
-import com.google.common.base.Charsets;
-import com.google.common.base.Splitter;
-import com.google.common.io.Files;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel.MapMode;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
-import java.util.WeakHashMap;
-
-/**
- * Database of common typos / misspellings.
- */
-public class TypoLookup {
- private static final TypoLookup NONE = new TypoLookup();
-
- /** String separating misspellings and suggested replacements in the text file */
- private static final String WORD_SEPARATOR = "->"; //$NON-NLS-1$
-
- /** Relative path to the typos database file within the Lint installation */
- private static final String XML_FILE_PATH = "tools/support/typos-%1$s.txt"; //$NON-NLS-1$
- private static final String FILE_HEADER = "Typo database used by Android lint\000";
- private static final int BINARY_FORMAT_VERSION = 2;
- private static final boolean DEBUG_FORCE_REGENERATE_BINARY = false;
- private static final boolean DEBUG_SEARCH = false;
- private static final boolean WRITE_STATS = false;
- /** Default size to reserve for each API entry when creating byte buffer to build up data */
- private static final int BYTES_PER_ENTRY = 28;
-
- private final LintClient mClient;
- private final File mXmlFile;
- private final File mBinaryFile;
- private byte[] mData;
- private int[] mIndices;
- private int mWordCount;
-
- private static WeakHashMap<String, TypoLookup> sInstanceMap =
- new WeakHashMap<String, TypoLookup>();
-
- /**
- * Returns an instance of the Typo database for the given locale
- *
- * @param client the client to associate with this database - used only for
- * logging. The database object may be shared among repeated
- * invocations, and in that case client used will be the one
- * originally passed in. In other words, this parameter may be
- * ignored if the client created is not new.
- * @param locale the locale to look up a typo database for (should be a
- * language code (ISO 639-1, two lowercase character names)
- * @param region the region to look up a typo database for (should be a two
- * letter ISO 3166-1 alpha-2 country code in upper case) language
- * code
- * @return a (possibly shared) instance of the typo database, or null if its
- * data can't be found
- */
- @Nullable
- public static TypoLookup get(@NonNull LintClient client, @NonNull String locale,
- @Nullable String region) {
- synchronized (TypoLookup.class) {
- String key = locale;
-
- if (region != null) {
- // Allow for region-specific dictionaries. See for example
- // http://en.wikipedia.org/wiki/American_and_British_English_spelling_differences
- assert region.length() == 2
- && Character.isUpperCase(region.charAt(0))
- && Character.isUpperCase(region.charAt(1)) : region;
- // Look for typos-en-rUS.txt etc
- key = locale + 'r' + region;
- }
-
- TypoLookup db = sInstanceMap.get(key);
- if (db == null) {
- String path = String.format(XML_FILE_PATH, key);
- File file = client.findResource(path);
- if (file == null) {
- // AOSP build environment?
- String build = System.getenv("ANDROID_BUILD_TOP"); //$NON-NLS-1$
- if (build != null) {
- file = new File(build, ("sdk/files/" //$NON-NLS-1$
- + path.substring(path.lastIndexOf('/') + 1))
- .replace('/', File.separatorChar));
- }
- }
-
- if (file == null || !file.exists()) {
- if (region != null) {
- // Fall back to the generic locale (non-region-specific) database
- return get(client, locale, null);
- }
- db = NONE;
- } else {
- db = get(client, file);
- assert db != null : file;
- }
- sInstanceMap.put(key, db);
- }
-
- if (db == NONE) {
- return null;
- } else {
- return db;
- }
- }
- }
-
- /**
- * Returns an instance of the typo database
- *
- * @param client the client to associate with this database - used only for
- * logging
- * @param xmlFile the XML file containing configuration data to use for this
- * database
- * @return a (possibly shared) instance of the typo database, or null
- * if its data can't be found
- */
- @Nullable
- private static TypoLookup get(LintClient client, File xmlFile) {
- if (!xmlFile.exists()) {
- client.log(null, "The typo database file %1$s does not exist", xmlFile);
- return null;
- }
-
- String name = xmlFile.getName();
- if (LintUtils.endsWith(name, DOT_XML)) {
- name = name.substring(0, name.length() - DOT_XML.length());
- }
- File cacheDir = client.getCacheDir(true/*create*/);
- if (cacheDir == null) {
- cacheDir = xmlFile.getParentFile();
- }
-
- File binaryData = new File(cacheDir, name
- // Incorporate version number in the filename to avoid upgrade filename
- // conflicts on Windows (such as issue #26663)
- + "-" + BINARY_FORMAT_VERSION + ".bin"); //$NON-NLS-1$ //$NON-NLS-2$
-
- if (DEBUG_FORCE_REGENERATE_BINARY) {
- System.err.println("\nTemporarily regenerating binary data unconditionally \nfrom "
- + xmlFile + "\nto " + binaryData);
- if (!createCache(client, xmlFile, binaryData)) {
- return null;
- }
- } else if (!binaryData.exists() || binaryData.lastModified() < xmlFile.lastModified()) {
- if (!createCache(client, xmlFile, binaryData)) {
- return null;
- }
- }
-
- if (!binaryData.exists()) {
- client.log(null, "The typo database file %1$s does not exist", binaryData);
- return null;
- }
-
- return new TypoLookup(client, xmlFile, binaryData);
- }
-
- private static boolean createCache(LintClient client, File xmlFile, File binaryData) {
- long begin = 0;
- if (WRITE_STATS) {
- begin = System.currentTimeMillis();
- }
-
- // Read in data
- List<String> lines;
- try {
- lines = Files.readLines(xmlFile, Charsets.UTF_8);
- } catch (IOException e) {
- client.log(e, "Can't read typo database file");
- return false;
- }
-
- if (WRITE_STATS) {
- long end = System.currentTimeMillis();
- System.out.println("Reading data structures took " + (end - begin) + " ms)");
- }
-
- try {
- writeDatabase(binaryData, lines);
- return true;
- } catch (IOException ioe) {
- client.log(ioe, "Can't write typo cache file");
- }
-
- return false;
- }
-
- /** Use one of the {@link #get} factory methods instead */
- private TypoLookup(
- @NonNull LintClient client,
- @NonNull File xmlFile,
- @Nullable File binaryFile) {
- mClient = client;
- mXmlFile = xmlFile;
- mBinaryFile = binaryFile;
-
- if (binaryFile != null) {
- readData();
- }
- }
-
- private TypoLookup() {
- mClient = null;
- mXmlFile = null;
- mBinaryFile = null;
- }
-
- private void readData() {
- if (!mBinaryFile.exists()) {
- mClient.log(null, "%1$s does not exist", mBinaryFile);
- return;
- }
- long start = System.currentTimeMillis();
- try {
- MappedByteBuffer buffer = Files.map(mBinaryFile, MapMode.READ_ONLY);
- assert buffer.order() == ByteOrder.BIG_ENDIAN;
-
- // First skip the header
- byte[] expectedHeader = FILE_HEADER.getBytes(Charsets.US_ASCII);
- buffer.rewind();
- for (int offset = 0; offset < expectedHeader.length; offset++) {
- if (expectedHeader[offset] != buffer.get()) {
- mClient.log(null, "Incorrect file header: not an typo database cache " +
- "file, or a corrupt cache file");
- return;
- }
- }
-
- // Read in the format number
- if (buffer.get() != BINARY_FORMAT_VERSION) {
- // Force regeneration of new binary data with up to date format
- if (createCache(mClient, mXmlFile, mBinaryFile)) {
- readData(); // Recurse
- }
-
- return;
- }
-
- mWordCount = buffer.getInt();
-
- // Read in the word table indices;
- int count = mWordCount;
- int[] offsets = new int[count];
-
- // Another idea: I can just store the DELTAS in the file (and add them up
- // when reading back in) such that it takes just ONE byte instead of four!
-
- for (int i = 0; i < count; i++) {
- offsets[i] = buffer.getInt();
- }
-
- // No need to read in the rest -- we'll just keep the whole byte array in memory
- // TODO: Make this code smarter/more efficient.
- int size = buffer.limit();
- byte[] b = new byte[size];
- buffer.rewind();
- buffer.get(b);
- mData = b;
- mIndices = offsets;
-
- // TODO: We only need to keep the data portion here since we've initialized
- // the offset array separately.
- // TODO: Investigate (profile) accessing the byte buffer directly instead of
- // accessing a byte array.
- } catch (IOException e) {
- mClient.log(e, null);
- }
- if (WRITE_STATS) {
- long end = System.currentTimeMillis();
- System.out.println("\nRead typo database in " + (end - start)
- + " milliseconds.");
- System.out.println("Size of data table: " + mData.length + " bytes ("
- + Integer.toString(mData.length/1024) + "k)\n");
- }
- }
-
- /** See the {@link #readData()} for documentation on the data format. */
- private static void writeDatabase(File file, List<String> lines) throws IOException {
- /*
- * 1. A file header, which is the exact contents of {@link FILE_HEADER} encoded
- * as ASCII characters. The purpose of the header is to identify what the file
- * is for, for anyone attempting to open the file.
- * 2. A file version number. If the binary file does not match the reader's expected
- * version, it can ignore it (and regenerate the cache from XML).
- */
-
- // Drop comments etc
- List<String> words = new ArrayList<String>(lines.size());
- for (String line : lines) {
- if (!line.isEmpty() && Character.isLetter(line.charAt(0))) {
- int end = line.indexOf(WORD_SEPARATOR);
- if (end == -1) {
- end = line.trim().length();
- }
- String typo = line.substring(0, end).trim();
- String replacements = line.substring(end + WORD_SEPARATOR.length()).trim();
- if (replacements.isEmpty()) {
- // We don't support empty replacements
- continue;
- }
- String combined = typo + (char) 0 + replacements;
-
- words.add(combined);
- }
- }
-
- byte[][] wordArrays = new byte[words.size()][];
- for (int i = 0, n = words.size(); i < n; i++) {
- String word = words.get(i);
- wordArrays[i] = word.getBytes(Charsets.UTF_8);
- }
- // Sort words, using our own comparator to ensure that it matches the
- // binary search in getTypos()
- Comparator<byte[]> comparator = new Comparator<byte[]>() {
- @Override
- public int compare(byte[] o1, byte[] o2) {
- return TypoLookup.compare(o1, 0, (byte) 0, o2, 0, o2.length);
- }
- };
- Arrays.sort(wordArrays, comparator);
-
- int entryCount = wordArrays.length;
- int capacity = entryCount * BYTES_PER_ENTRY;
- ByteBuffer buffer = ByteBuffer.allocate(capacity);
- buffer.order(ByteOrder.BIG_ENDIAN);
- // 1. A file header, which is the exact contents of {@link FILE_HEADER} encoded
- // as ASCII characters. The purpose of the header is to identify what the file
- // is for, for anyone attempting to open the file.
- buffer.put(FILE_HEADER.getBytes(Charsets.US_ASCII));
-
- // 2. A file version number. If the binary file does not match the reader's expected
- // version, it can ignore it (and regenerate the cache from XML).
- buffer.put((byte) BINARY_FORMAT_VERSION);
-
- // 3. The number of words [1 int]
- buffer.putInt(entryCount);
-
- // 4. Word offset table (one integer per word, pointing to the byte offset in the
- // file (relative to the beginning of the file) where each word begins.
- // The words are always sorted alphabetically.
- int wordOffsetTable = buffer.position();
-
- // Reserve enough room for the offset table here: we will backfill it with pointers
- // as we're writing out the data structures below
- for (int i = 0, n = entryCount; i < n; i++) {
- buffer.putInt(0);
- }
-
- int nextEntry = buffer.position();
- int nextOffset = wordOffsetTable;
-
- // 7. Word entry table. Each word entry consists of the word, followed by the byte 0
- // as a terminator, followed by a comma separated list of suggestions (which
- // may be empty), or a final 0.
- for (int i = 0; i < entryCount; i++) {
- byte[] word = wordArrays[i];
- buffer.position(nextOffset);
- buffer.putInt(nextEntry);
- nextOffset = buffer.position();
- buffer.position(nextEntry);
-
- buffer.put(word); // already embeds 0 to separate typo from words
- buffer.put((byte) 0);
-
- nextEntry = buffer.position();
- }
-
- int size = buffer.position();
- assert size <= buffer.limit();
- buffer.mark();
-
- if (WRITE_STATS) {
- System.out.println("Wrote " + words.size() + " word entries");
- System.out.print("Actual binary size: " + size + " bytes");
- System.out.println(String.format(" (%.1fM)", size/(1024*1024.f)));
-
- System.out.println("Allocated size: " + (entryCount * BYTES_PER_ENTRY) + " bytes");
- System.out.println("Required bytes per entry: " + (size/ entryCount) + " bytes");
- }
-
- // Now dump this out as a file
- // There's probably an API to do this more efficiently; TODO: Look into this.
- byte[] b = new byte[size];
- buffer.rewind();
- buffer.get(b);
- FileOutputStream output = Files.newOutputStreamSupplier(file).getOutput();
- output.write(b);
- output.close();
- }
-
- // For debugging only
- private String dumpEntry(int offset) {
- if (DEBUG_SEARCH) {
- int end = offset;
- while (mData[end] != 0) {
- end++;
- }
- return new String(mData, offset, end - offset, Charsets.UTF_8);
- } else {
- return "<disabled>"; //$NON-NLS-1$
- }
- }
-
- /** Comparison function: *only* used for ASCII strings */
- @VisibleForTesting
- static int compare(byte[] data, int offset, byte terminator, CharSequence s,
- int begin, int end) {
- int i = offset;
- int j = begin;
- for (; ; i++, j++) {
- byte b = data[i];
- if (b == ' ') {
- // We've matched up to the space in a split-word typo, such as
- // in German all zu=>allzu; here we've matched just past "all".
- // Rather than terminating, attempt to continue in the buffer.
- if (j == end) {
- int max = s.length();
- if (end < max && s.charAt(end) == ' ') {
- // Find next word
- for (; end < max; end++) {
- char c = s.charAt(end);
- if (!Character.isLetter(c)) {
- if (c == ' ' && end == j) {
- continue;
- }
- break;
- }
- }
- }
- }
- }
-
- if (j == end) {
- break;
- }
-
- if (b == '*') {
- // Glob match (only supported at the end)
- return 0;
- }
- char c = s.charAt(j);
- byte cb = (byte) c;
- int delta = b - cb;
- if (delta != 0) {
- cb = (byte) Character.toLowerCase(c);
- if (b != cb) {
- // Ensure that it has the right sign
- b = (byte) Character.toLowerCase(b);
- delta = b - cb;
- if (delta != 0) {
- return delta;
- }
- }
- }
- }
-
- return data[i] - terminator;
- }
-
- /** Comparison function used for general UTF-8 encoded strings */
- @VisibleForTesting
- static int compare(byte[] data, int offset, byte terminator, byte[] s,
- int begin, int end) {
- int i = offset;
- int j = begin;
- for (; ; i++, j++) {
- byte b = data[i];
- if (b == ' ') {
- // We've matched up to the space in a split-word typo, such as
- // in German all zu=>allzu; here we've matched just past "all".
- // Rather than terminating, attempt to continue in the buffer.
- // We've matched up to the space in a split-word typo, such as
- // in German all zu=>allzu; here we've matched just past "all".
- // Rather than terminating, attempt to continue in the buffer.
- if (j == end) {
- int max = s.length;
- if (end < max && s[end] == ' ') {
- // Find next word
- for (; end < max; end++) {
- byte cb = s[end];
- if (!isLetter(cb)) {
- if (cb == ' ' && end == j) {
- continue;
- }
- break;
- }
- }
- }
- }
- }
-
- if (j == end) {
- break;
- }
- if (b == '*') {
- // Glob match (only supported at the end)
- return 0;
- }
- byte cb = s[j];
- int delta = b - cb;
- if (delta != 0) {
- cb = toLowerCase(cb);
- b = toLowerCase(b);
- delta = b - cb;
- if (delta != 0) {
- return delta;
- }
- }
-
- if (b == terminator || cb == terminator) {
- return delta;
- }
- }
-
- return data[i] - terminator;
- }
-
- /**
- * Look up whether this word is a typo, and if so, return the typo itself
- * and one or more likely meanings
- *
- * @param text the string containing the word
- * @param begin the index of the first character in the word
- * @param end the index of the first character after the word. Note that the
- * search may extend <b>beyond</b> this index, if for example the
- * word matches a multi-word typo in the dictionary
- * @return a list of the typo itself followed by the replacement strings if
- * the word represents a typo, and null otherwise
- */
- @Nullable
- public List<String> getTypos(@NonNull CharSequence text, int begin, int end) {
- assert end <= text.length();
-
- if (assertionsEnabled()) {
- for (int i = begin; i < end; i++) {
- char c = text.charAt(i);
- if (c >= 128) {
- assert false : "Call the UTF-8 version of this method instead";
- return null;
- }
- }
- }
-
- int low = 0;
- int high = mWordCount - 1;
- while (low <= high) {
- int middle = (low + high) >>> 1;
- int offset = mIndices[middle];
-
- if (DEBUG_SEARCH) {
- System.out.println("Comparing string " + text +" with entry at " + offset
- + ": " + dumpEntry(offset));
- }
-
- // Compare the word at the given index.
- int compare = compare(mData, offset, (byte) 0, text, begin, end);
-
- if (compare == 0) {
- offset = mIndices[middle];
-
- // Don't allow matching uncapitalized words, such as "enlish", when
- // the dictionary word is capitalized, "Enlish".
- if (mData[offset] != text.charAt(begin)
- && Character.isLowerCase(text.charAt(begin))) {
- return null;
- }
-
- // Make sure there is a case match; we only want to allow
- // matching capitalized words to capitalized typos or uncapitalized typos
- // (e.g. "Teh" and "teh" to "the"), but not uncapitalized words to capitalized
- // typos (e.g. "enlish" to "Enlish").
- String glob = null;
- for (int i = begin; ; i++) {
- byte b = mData[offset++];
- if (b == 0) {
- offset--;
- break;
- } else if (b == '*') {
- int globEnd = i;
- while (globEnd < text.length()
- && Character.isLetter(text.charAt(globEnd))) {
- globEnd++;
- }
- glob = text.subSequence(i, globEnd).toString();
- break;
- }
- char c = text.charAt(i);
- byte cb = (byte) c;
- if (b != cb && i > begin) {
- return null;
- }
- }
-
- return computeSuggestions(mIndices[middle], offset, glob);
- }
-
- if (compare < 0) {
- low = middle + 1;
- } else if (compare > 0) {
- high = middle - 1;
- } else {
- assert false; // compare == 0 already handled above
- return null;
- }
- }
-
- return null;
- }
-
- /**
- * Look up whether this word is a typo, and if so, return the typo itself
- * and one or more likely meanings
- *
- * @param utf8Text the string containing the word, encoded as UTF-8
- * @param begin the index of the first character in the word
- * @param end the index of the first character after the word. Note that the
- * search may extend <b>beyond</b> this index, if for example the
- * word matches a multi-word typo in the dictionary
- * @return a list of the typo itself followed by the replacement strings if
- * the word represents a typo, and null otherwise
- */
- @Nullable
- public List<String> getTypos(@NonNull byte[] utf8Text, int begin, int end) {
- assert end <= utf8Text.length;
-
- int low = 0;
- int high = mWordCount - 1;
- while (low <= high) {
- int middle = (low + high) >>> 1;
- int offset = mIndices[middle];
-
- if (DEBUG_SEARCH) {
- String s = new String(Arrays.copyOfRange(utf8Text, begin, end), Charsets.UTF_8);
- System.out.println("Comparing string " + s +" with entry at " + offset
- + ": " + dumpEntry(offset));
- System.out.println(" middle=" + middle + ", low=" + low + ", high=" + high);
- }
-
- // Compare the word at the given index.
- int compare = compare(mData, offset, (byte) 0, utf8Text, begin, end);
-
- if (DEBUG_SEARCH) {
- System.out.println(" signum=" + (int)Math.signum(compare) + ", delta=" + compare);
- }
-
- if (compare == 0) {
- offset = mIndices[middle];
-
- // Don't allow matching uncapitalized words, such as "enlish", when
- // the dictionary word is capitalized, "Enlish".
- if (mData[offset] != utf8Text[begin] && isUpperCase(mData[offset])) {
- return null;
- }
-
- // Make sure there is a case match; we only want to allow
- // matching capitalized words to capitalized typos or uncapitalized typos
- // (e.g. "Teh" and "teh" to "the"), but not uncapitalized words to capitalized
- // typos (e.g. "enlish" to "Enlish").
- String glob = null;
- for (int i = begin; ; i++) {
- byte b = mData[offset++];
- if (b == 0) {
- offset--;
- break;
- } else if (b == '*') {
- int globEnd = i;
- while (globEnd < utf8Text.length && isLetter(utf8Text[globEnd])) {
- globEnd++;
- }
- glob = new String(utf8Text, i, globEnd - i, Charsets.UTF_8);
- break;
- }
- byte cb = utf8Text[i];
- if (b != cb && i > begin) {
- return null;
- }
- }
-
- return computeSuggestions(mIndices[middle], offset, glob);
- }
-
- if (compare < 0) {
- low = middle + 1;
- } else if (compare > 0) {
- high = middle - 1;
- } else {
- assert false; // compare == 0 already handled above
- return null;
- }
- }
-
- return null;
- }
-
- private List<String> computeSuggestions(int begin, int offset, String glob) {
- String typo = new String(mData, begin, offset - begin, Charsets.UTF_8);
-
- if (glob != null) {
- typo = typo.replaceAll("\\*", glob); //$NON-NLS-1$
- }
-
- assert mData[offset] == 0;
- offset++;
- int replacementEnd = offset;
- while (mData[replacementEnd] != 0) {
- replacementEnd++;
- }
- String replacements = new String(mData, offset, replacementEnd - offset, Charsets.UTF_8);
- List<String> words = new ArrayList<String>();
- words.add(typo);
-
- // The first entry should be the typo itself. We need to pass this back since due
- // to multi-match words and globbing it could extend beyond the initial word range
-
- for (String s : Splitter.on(',').omitEmptyStrings().trimResults().split(replacements)) {
- if (glob != null) {
- // Need to append the glob string to each result
- words.add(s.replaceAll("\\*", glob)); //$NON-NLS-1$
- } else {
- words.add(s);
- }
- }
-
- return words;
- }
-
- // "Character" handling for bytes. This assumes that the bytes correspond to Unicode
- // characters in the ISO 8859-1 range, which is are encoded the same way in UTF-8.
- // This obviously won't work to for example uppercase to lowercase conversions for
- // multi byte characters, which means we simply won't catch typos if the dictionaries
- // contain these. None of the currently included dictionaries do. However, it does
- // help us properly deal with punctuation and spacing characters.
-
- static final boolean isUpperCase(byte b) {
- return Character.isUpperCase((char) b);
- }
-
- static final byte toLowerCase(byte b) {
- return (byte) Character.toLowerCase((char) b);
- }
-
- static final boolean isSpace(byte b) {
- return Character.isWhitespace((char) b);
- }
-
- static final boolean isLetter(byte b) {
- // Assume that multi byte characters represent letters in other languages.
- // Obviously, it could be unusual punctuation etc but letters are more likely
- // in this context.
- return Character.isLetter((char) b) || (b & 0x80) != 0;
- }
-}