diff options
author | Narayan Kamath <narayan@google.com> | 2015-06-17 11:38:12 +0000 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2015-06-17 11:38:16 +0000 |
commit | 2c89e7d8dba1713de17157a164071c65f4b00915 (patch) | |
tree | 40b5eab1f757cbbea5c48343d9ad9c6f35b1d56f | |
parent | 4600dd053dbdbd4b95f3b11057a1cc55b99f9c77 (diff) | |
parent | b39dfe87b89e3773910202f2e94b9860236551d9 (diff) | |
download | frameworks_base-2c89e7d8dba1713de17157a164071c65f4b00915.zip frameworks_base-2c89e7d8dba1713de17157a164071c65f4b00915.tar.gz frameworks_base-2c89e7d8dba1713de17157a164071c65f4b00915.tar.bz2 |
Merge "Delete android.speech.srec.*" into mnc-dev
-rw-r--r-- | core/java/android/speech/srec/MicrophoneInputStream.java | 110 | ||||
-rw-r--r-- | core/java/android/speech/srec/Recognizer.java | 716 | ||||
-rw-r--r-- | core/java/android/speech/srec/UlawEncoderInputStream.java | 187 | ||||
-rw-r--r-- | core/java/android/speech/srec/WaveHeader.java | 276 | ||||
-rw-r--r-- | core/java/android/speech/srec/package.html | 6 |
5 files changed, 0 insertions, 1295 deletions
diff --git a/core/java/android/speech/srec/MicrophoneInputStream.java b/core/java/android/speech/srec/MicrophoneInputStream.java deleted file mode 100644 index 94db176..0000000 --- a/core/java/android/speech/srec/MicrophoneInputStream.java +++ /dev/null @@ -1,110 +0,0 @@ -/*---------------------------------------------------------------------------* - * MicrophoneInputStream.java * - * * - * Copyright 2007 Nuance Communciations, Inc. * - * * - * Licensed under the Apache License, Version 2.0 (the 'License'); * - * you may not use this file except in compliance with the License. * - * * - * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 * - * * - * Unless required by applicable law or agreed to in writing, software * - * distributed under the License is distributed on an 'AS IS' BASIS, * - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * - * See the License for the specific language governing permissions and * - * limitations under the License. * - * * - *---------------------------------------------------------------------------*/ - - -package android.speech.srec; - -import java.io.IOException; -import java.io.InputStream; -import java.lang.IllegalStateException; - - -/** - * PCM input stream from the microphone, 16 bits per sample. - */ -public final class MicrophoneInputStream extends InputStream { - static { - System.loadLibrary("srec_jni"); - } - - private final static String TAG = "MicrophoneInputStream"; - private long mAudioRecord = 0; - private byte[] mOneByte = new byte[1]; - - /** - * MicrophoneInputStream constructor. - * @param sampleRate sample rate of the microphone, typically 11025 or 8000. - * @param fifoDepth depth of the real time fifo, measured in sampleRate clock ticks. - * This determines how long an application may delay before losing data. - */ - public MicrophoneInputStream(int sampleRate, int fifoDepth) throws IOException { - mAudioRecord = AudioRecordNew(sampleRate, fifoDepth); - if (mAudioRecord == 0) throw new IOException("AudioRecord constructor failed - busy?"); - int status = AudioRecordStart(mAudioRecord); - if (status != 0) { - close(); - throw new IOException("AudioRecord start failed: " + status); - } - } - - @Override - public int read() throws IOException { - if (mAudioRecord == 0) throw new IllegalStateException("not open"); - int rtn = AudioRecordRead(mAudioRecord, mOneByte, 0, 1); - return rtn == 1 ? ((int)mOneByte[0] & 0xff) : -1; - } - - @Override - public int read(byte[] b) throws IOException { - if (mAudioRecord == 0) throw new IllegalStateException("not open"); - return AudioRecordRead(mAudioRecord, b, 0, b.length); - } - - @Override - public int read(byte[] b, int offset, int length) throws IOException { - if (mAudioRecord == 0) throw new IllegalStateException("not open"); - // TODO: should we force all reads to be a multiple of the sample size? - return AudioRecordRead(mAudioRecord, b, offset, length); - } - - /** - * Closes this stream. - */ - @Override - public void close() throws IOException { - if (mAudioRecord != 0) { - try { - AudioRecordStop(mAudioRecord); - } finally { - try { - AudioRecordDelete(mAudioRecord); - } finally { - mAudioRecord = 0; - } - } - } - } - - @Override - protected void finalize() throws Throwable { - if (mAudioRecord != 0) { - close(); - throw new IOException("someone forgot to close MicrophoneInputStream"); - } - } - - // - // AudioRecord JNI interface - // - private static native long AudioRecordNew(int sampleRate, int fifoDepth); - private static native int AudioRecordStart(long audioRecord); - private static native int AudioRecordRead(long audioRecord, byte[] b, int offset, int length) throws IOException; - private static native void AudioRecordStop(long audioRecord) throws IOException; - private static native void AudioRecordDelete(long audioRecord) throws IOException; -} diff --git a/core/java/android/speech/srec/Recognizer.java b/core/java/android/speech/srec/Recognizer.java deleted file mode 100644 index 6c491a0..0000000 --- a/core/java/android/speech/srec/Recognizer.java +++ /dev/null @@ -1,716 +0,0 @@ -/* - * --------------------------------------------------------------------------- - * Recognizer.java - * - * Copyright 2007 Nuance Communciations, Inc. - * - * Licensed under the Apache License, Version 2.0 (the 'License'); you may not - * use this file except in compliance with the License. - * - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * --------------------------------------------------------------------------- - */ - - -package android.speech.srec; - -import java.io.File; -import java.io.InputStream; -import java.io.IOException; -import java.util.Locale; - -/** - * Simple, synchronous speech recognizer, using the Nuance SREC package. - * Usages proceeds as follows: - * - * <ul> - * <li>Create a <code>Recognizer</code>. - * <li>Create a <code>Recognizer.Grammar</code>. - * <li>Setup the <code>Recognizer.Grammar</code>. - * <li>Reset the <code>Recognizer.Grammar</code> slots, if needed. - * <li>Fill the <code>Recognizer.Grammar</code> slots, if needed. - * <li>Compile the <code>Recognizer.Grammar</code>, if needed. - * <li>Save the filled <code>Recognizer.Grammar</code>, if needed. - * <li>Start the <code>Recognizer</code>. - * <li>Loop over <code>advance</code> and <code>putAudio</code> until recognition complete. - * <li>Fetch and process results, or notify of failure. - * <li>Stop the <code>Recognizer</code>. - * <li>Destroy the <code>Recognizer</code>. - * </ul> - * - * <p>Below is example code</p> - * - * <pre class="prettyprint"> - * - * // create and start audio input - * InputStream audio = new MicrophoneInputStream(11025, 11025*5); - * // create a Recognizer - * String cdir = Recognizer.getConfigDir(null); - * Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par"); - * // create and load a Grammar - * Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g"); - * // setup the Grammar to work with the Recognizer - * grammar.setupRecognizer(); - * // fill the Grammar slots with names and save, if required - * grammar.resetAllSlots(); - * for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1"); - * grammar.compile(); - * grammar.save(".../foo.g2g"); - * // start the Recognizer - * recognizer.start(); - * // loop over Recognizer events - * while (true) { - * switch (recognizer.advance()) { - * case Recognizer.EVENT_INCOMPLETE: - * case Recognizer.EVENT_STARTED: - * case Recognizer.EVENT_START_OF_VOICING: - * case Recognizer.EVENT_END_OF_VOICING: - * // let the Recognizer continue to run - * continue; - * case Recognizer.EVENT_RECOGNITION_RESULT: - * // success, so fetch results here! - * for (int i = 0; i < recognizer.getResultCount(); i++) { - * String result = recognizer.getResult(i, Recognizer.KEY_LITERAL); - * } - * break; - * case Recognizer.EVENT_NEED_MORE_AUDIO: - * // put more audio in the Recognizer - * recognizer.putAudio(audio); - * continue; - * default: - * notifyFailure(); - * break; - * } - * break; - * } - * // stop the Recognizer - * recognizer.stop(); - * // destroy the Recognizer - * recognizer.destroy(); - * // stop the audio device - * audio.close(); - * - * </pre> - */ -public final class Recognizer { - static { - System.loadLibrary("srec_jni"); - } - - private static String TAG = "Recognizer"; - - /** - * Result key corresponding to confidence score. - */ - public static final String KEY_CONFIDENCE = "conf"; - - /** - * Result key corresponding to literal text. - */ - public static final String KEY_LITERAL = "literal"; - - /** - * Result key corresponding to semantic meaning text. - */ - public static final String KEY_MEANING = "meaning"; - - // handle to SR_Vocabulary object - private long mVocabulary = 0; - - // handle to SR_Recognizer object - private long mRecognizer = 0; - - // Grammar currently associated with Recognizer via SR_GrammarSetupRecognizer - private Grammar mActiveGrammar = null; - - /** - * Get the pathname of the SREC configuration directory corresponding to the - * language indicated by the Locale. - * This directory contains dictionaries, speech models, - * configuration files, and other data needed by the Recognizer. - * @param locale <code>Locale</code> corresponding to the desired language, - * or null for default, currently <code>Locale.US</code>. - * @return Pathname of the configuration directory. - */ - public static String getConfigDir(Locale locale) { - if (locale == null) locale = Locale.US; - String dir = "/system/usr/srec/config/" + - locale.toString().replace('_', '.').toLowerCase(Locale.ROOT); - if ((new File(dir)).isDirectory()) return dir; - return null; - } - - /** - * Create an instance of a SREC speech recognizer. - * - * @param configFile pathname of the baseline*.par configuration file, - * which in turn contains references to dictionaries, speech models, - * and other data needed to configure and operate the recognizer. - * A separate config file is needed for each audio sample rate. - * Two files, baseline11k.par and baseline8k.par, which correspond to - * 11025 and 8000 hz, are present in the directory indicated by - * {@link #getConfigDir}. - * @throws IOException - */ - public Recognizer(String configFile) throws IOException { - PMemInit(); - SR_SessionCreate(configFile); - mRecognizer = SR_RecognizerCreate(); - SR_RecognizerSetup(mRecognizer); - mVocabulary = SR_VocabularyLoad(); - } - - /** - * Represents a grammar loaded into the Recognizer. - */ - public class Grammar { - private long mGrammar = 0; - - /** - * Create a <code>Grammar</code> instance. - * @param g2gFileName pathname of g2g file. - */ - public Grammar(String g2gFileName) throws IOException { - mGrammar = SR_GrammarLoad(g2gFileName); - SR_GrammarSetupVocabulary(mGrammar, mVocabulary); - } - - /** - * Reset all slots. - */ - public void resetAllSlots() { - SR_GrammarResetAllSlots(mGrammar); - } - - /** - * Add a word to a slot. - * - * @param slot slot name. - * @param word word to insert. - * @param pron pronunciation, or null to derive from word. - * @param weight weight to give the word. One is normal, 50 is low. - * @param tag semantic meaning tag string. - */ - public void addWordToSlot(String slot, String word, String pron, int weight, String tag) { - SR_GrammarAddWordToSlot(mGrammar, slot, word, pron, weight, tag); - } - - /** - * Compile all slots. - */ - public void compile() { - SR_GrammarCompile(mGrammar); - } - - /** - * Setup <code>Grammar</code> with <code>Recognizer</code>. - */ - public void setupRecognizer() { - SR_GrammarSetupRecognizer(mGrammar, mRecognizer); - mActiveGrammar = this; - } - - /** - * Save <code>Grammar</code> to g2g file. - * - * @param g2gFileName - * @throws IOException - */ - public void save(String g2gFileName) throws IOException { - SR_GrammarSave(mGrammar, g2gFileName); - } - - /** - * Release resources associated with this <code>Grammar</code>. - */ - public void destroy() { - // TODO: need to do cleanup and disassociation with Recognizer - if (mGrammar != 0) { - SR_GrammarDestroy(mGrammar); - mGrammar = 0; - } - } - - /** - * Clean up resources. - */ - protected void finalize() { - if (mGrammar != 0) { - destroy(); - throw new IllegalStateException("someone forgot to destroy Grammar"); - } - } - } - - /** - * Start recognition - */ - public void start() { - // TODO: shouldn't be here? - SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1); - SR_RecognizerStart(mRecognizer); - } - - /** - * Process some audio and return the current status. - * @return recognition event, one of: - * <ul> - * <li><code>EVENT_INVALID</code> - * <li><code>EVENT_NO_MATCH</code> - * <li><code>EVENT_INCOMPLETE</code> - * <li><code>EVENT_STARTED</code> - * <li><code>EVENT_STOPPED</code> - * <li><code>EVENT_START_OF_VOICING</code> - * <li><code>EVENT_END_OF_VOICING</code> - * <li><code>EVENT_SPOKE_TOO_SOON</code> - * <li><code>EVENT_RECOGNITION_RESULT</code> - * <li><code>EVENT_START_OF_UTTERANCE_TIMEOUT</code> - * <li><code>EVENT_RECOGNITION_TIMEOUT</code> - * <li><code>EVENT_NEED_MORE_AUDIO</code> - * <li><code>EVENT_MAX_SPEECH</code> - * </ul> - */ - public int advance() { - return SR_RecognizerAdvance(mRecognizer); - } - - /** - * Put audio samples into the <code>Recognizer</code>. - * @param buf holds the audio samples. - * @param offset offset of the first sample. - * @param length number of bytes containing samples. - * @param isLast indicates no more audio data, normally false. - * @return number of bytes accepted. - */ - public int putAudio(byte[] buf, int offset, int length, boolean isLast) { - return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast); - } - - /** - * Read audio samples from an <code>InputStream</code> and put them in the - * <code>Recognizer</code>. - * @param audio <code>InputStream</code> containing PCM audio samples. - */ - public void putAudio(InputStream audio) throws IOException { - // make sure the audio buffer is allocated - if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512]; - // read some data - int nbytes = audio.read(mPutAudioBuffer); - // eof, so signal Recognizer - if (nbytes == -1) { - SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true); - } - // put it into the Recognizer - else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) { - throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes); - } - } - - // audio buffer for putAudio(InputStream) - private byte[] mPutAudioBuffer = null; - - /** - * Get the number of recognition results. Must be called after - * <code>EVENT_RECOGNITION_RESULT</code> is returned by - * <code>advance</code>, but before <code>stop</code>. - * - * @return number of results in nbest list. - */ - public int getResultCount() { - return SR_RecognizerResultGetSize(mRecognizer); - } - - /** - * Get a set of keys for the result. Must be called after - * <code>EVENT_RECOGNITION_RESULT</code> is returned by - * <code>advance</code>, but before <code>stop</code>. - * - * @param index index of result. - * @return array of keys. - */ - public String[] getResultKeys(int index) { - return SR_RecognizerResultGetKeyList(mRecognizer, index); - } - - /** - * Get a result value. Must be called after - * <code>EVENT_RECOGNITION_RESULT</code> is returned by - * <code>advance</code>, but before <code>stop</code>. - * - * @param index index of the result. - * @param key key of the result. This is typically one of - * <code>KEY_CONFIDENCE</code>, <code>KEY_LITERAL</code>, or - * <code>KEY_MEANING</code>, but the user can also define their own keys - * in a grxml file, or in the <code>tag</code> slot of - * <code>Grammar.addWordToSlot</code>. - * @return the result. - */ - public String getResult(int index, String key) { - return SR_RecognizerResultGetValue(mRecognizer, index, key); - } - - /** - * Stop the <code>Recognizer</code>. - */ - public void stop() { - SR_RecognizerStop(mRecognizer); - SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash"); - } - - /** - * Reset the acoustic state vectorto it's default value. - * - * @hide - */ - public void resetAcousticState() { - SR_AcousticStateReset(mRecognizer); - } - - /** - * Set the acoustic state vector. - * @param state String containing the acoustic state vector. - * - * @hide - */ - public void setAcousticState(String state) { - SR_AcousticStateSet(mRecognizer, state); - } - - /** - * Get the acoustic state vector. - * @return String containing the acoustic state vector. - * - * @hide - */ - public String getAcousticState() { - return SR_AcousticStateGet(mRecognizer); - } - - /** - * Clean up resources. - */ - public void destroy() { - try { - if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary); - } finally { - mVocabulary = 0; - try { - if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer); - } finally { - try { - if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer); - } finally { - mRecognizer = 0; - try { - SR_SessionDestroy(); - } finally { - PMemShutdown(); - } - } - } - } - } - - /** - * Clean up resources. - */ - protected void finalize() throws Throwable { - if (mVocabulary != 0 || mRecognizer != 0) { - destroy(); - throw new IllegalStateException("someone forgot to destroy Recognizer"); - } - } - - /* an example session captured, for reference - void doall() { - if (PMemInit ( ) - || lhs_audioinOpen ( WAVE_MAPPER, SREC_TEST_DEFAULT_AUDIO_FREQUENCY, &audio_in_handle ) - || srec_test_init_application_data ( &applicationData, argc, argv ) - || SR_SessionCreate ( "/system/usr/srec/config/en.us/baseline11k.par" ) - || SR_RecognizerCreate ( &applicationData.recognizer ) - || SR_RecognizerSetup ( applicationData.recognizer) - || ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), filename, &flen ) - || SR_VocabularyLoad ( filename, &applicationData.vocabulary ) - || SR_VocabularyGetLanguage ( applicationData.vocabulary, &applicationData.locale ) - || (applicationData.nametag = NULL) - || SR_NametagsCreate ( &applicationData.nametags ) - || (LSTRCPY ( applicationData.grammars [0].grammar_path, "/system/usr/srec/config/en.us/grammars/VoiceDialer.g2g" ), 0) - || (LSTRCPY ( applicationData.grammars [0].grammarID, "BothTags" ), 0) - || (LSTRCPY ( applicationData.grammars [0].ruleName, "trash" ), 0) - || (applicationData.grammars [0].is_ve_grammar = ESR_FALSE, 0) - || SR_GrammarLoad (applicationData.grammars [0].grammar_path, &applicationData.grammars [applicationData.grammarCount].grammar ) - || SR_GrammarSetupVocabulary ( applicationData.grammars [0].grammar, applicationData.vocabulary ) - || SR_GrammarSetupRecognizer( applicationData.grammars [0].grammar, applicationData.recognizer ) - || SR_GrammarSetDispatchFunction ( applicationData.grammars [0].grammar, L("myDSMCallback"), NULL, myDSMCallback ) - || (applicationData.grammarCount++, 0) - || SR_RecognizerActivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, - applicationData.grammars [0].ruleName, 1 ) - || (applicationData.active_grammar_num = 0, 0) - || lhs_audioinStart ( audio_in_handle ) - || SR_RecognizerStart ( applicationData.recognizer ) - || strl ( applicationData.grammars [0].grammar, &applicationData, audio_in_handle, &recognition_count ) - || SR_RecognizerStop ( applicationData.recognizer ) - || lhs_audioinStop ( audio_in_handle ) - || SR_RecognizerDeactivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, applicationData.grammars [0].ruleName ) - || (applicationData.active_grammar_num = -1, 0) - || SR_GrammarDestroy ( applicationData.grammars [0].grammar ) - || (applicationData.grammarCount--, 0) - || SR_NametagsDestroy ( applicationData.nametags ) - || (applicationData.nametags = NULL, 0) - || SR_VocabularyDestroy ( applicationData.vocabulary ) - || (applicationData.vocabulary = NULL) - || SR_RecognizerUnsetup ( applicationData.recognizer) // releases acoustic models - || SR_RecognizerDestroy ( applicationData.recognizer ) - || (applicationData.recognizer = NULL) - || SR_SessionDestroy ( ) - || srec_test_shutdown_application_data ( &applicationData ) - || lhs_audioinClose ( &audio_in_handle ) - || PMemShutdown ( ) - } - */ - - - // - // PMem native methods - // - private static native void PMemInit(); - private static native void PMemShutdown(); - - - // - // SR_Session native methods - // - private static native void SR_SessionCreate(String filename); - private static native void SR_SessionDestroy(); - - - // - // SR_Recognizer native methods - // - - /** - * Reserved value. - */ - public final static int EVENT_INVALID = 0; - - /** - * <code>Recognizer</code> could not find a match for the utterance. - */ - public final static int EVENT_NO_MATCH = 1; - - /** - * <code>Recognizer</code> processed one frame of audio. - */ - public final static int EVENT_INCOMPLETE = 2; - - /** - * <code>Recognizer</code> has just been started. - */ - public final static int EVENT_STARTED = 3; - - /** - * <code>Recognizer</code> is stopped. - */ - public final static int EVENT_STOPPED = 4; - - /** - * Beginning of speech detected. - */ - public final static int EVENT_START_OF_VOICING = 5; - - /** - * End of speech detected. - */ - public final static int EVENT_END_OF_VOICING = 6; - - /** - * Beginning of utterance occured too soon. - */ - public final static int EVENT_SPOKE_TOO_SOON = 7; - - /** - * Recognition match detected. - */ - public final static int EVENT_RECOGNITION_RESULT = 8; - - /** - * Timeout occured before beginning of utterance. - */ - public final static int EVENT_START_OF_UTTERANCE_TIMEOUT = 9; - - /** - * Timeout occured before speech recognition could complete. - */ - public final static int EVENT_RECOGNITION_TIMEOUT = 10; - - /** - * Not enough samples to process one frame. - */ - public final static int EVENT_NEED_MORE_AUDIO = 11; - - /** - * More audio encountered than is allowed by 'swirec_max_speech_duration'. - */ - public final static int EVENT_MAX_SPEECH = 12; - - /** - * Produce a displayable string from an <code>advance</code> event. - * @param event - * @return String representing the event. - */ - public static String eventToString(int event) { - switch (event) { - case EVENT_INVALID: - return "EVENT_INVALID"; - case EVENT_NO_MATCH: - return "EVENT_NO_MATCH"; - case EVENT_INCOMPLETE: - return "EVENT_INCOMPLETE"; - case EVENT_STARTED: - return "EVENT_STARTED"; - case EVENT_STOPPED: - return "EVENT_STOPPED"; - case EVENT_START_OF_VOICING: - return "EVENT_START_OF_VOICING"; - case EVENT_END_OF_VOICING: - return "EVENT_END_OF_VOICING"; - case EVENT_SPOKE_TOO_SOON: - return "EVENT_SPOKE_TOO_SOON"; - case EVENT_RECOGNITION_RESULT: - return "EVENT_RECOGNITION_RESULT"; - case EVENT_START_OF_UTTERANCE_TIMEOUT: - return "EVENT_START_OF_UTTERANCE_TIMEOUT"; - case EVENT_RECOGNITION_TIMEOUT: - return "EVENT_RECOGNITION_TIMEOUT"; - case EVENT_NEED_MORE_AUDIO: - return "EVENT_NEED_MORE_AUDIO"; - case EVENT_MAX_SPEECH: - return "EVENT_MAX_SPEECH"; - } - return "EVENT_" + event; - } - - // - // SR_Recognizer methods - // - private static native void SR_RecognizerStart(long recognizer); - private static native void SR_RecognizerStop(long recognizer); - private static native long SR_RecognizerCreate(); - private static native void SR_RecognizerDestroy(long recognizer); - private static native void SR_RecognizerSetup(long recognizer); - private static native void SR_RecognizerUnsetup(long recognizer); - private static native boolean SR_RecognizerIsSetup(long recognizer); - private static native String SR_RecognizerGetParameter(long recognizer, String key); - private static native int SR_RecognizerGetSize_tParameter(long recognizer, String key); - private static native boolean SR_RecognizerGetBoolParameter(long recognizer, String key); - private static native void SR_RecognizerSetParameter(long recognizer, String key, String value); - private static native void SR_RecognizerSetSize_tParameter(long recognizer, - String key, int value); - private static native void SR_RecognizerSetBoolParameter(long recognizer, String key, - boolean value); - private static native void SR_RecognizerSetupRule(long recognizer, long grammar, - String ruleName); - private static native boolean SR_RecognizerHasSetupRules(long recognizer); - private static native void SR_RecognizerActivateRule(long recognizer, long grammar, - String ruleName, int weight); - private static native void SR_RecognizerDeactivateRule(long recognizer, long grammar, - String ruleName); - private static native void SR_RecognizerDeactivateAllRules(long recognizer); - private static native boolean SR_RecognizerIsActiveRule(long recognizer, long grammar, - String ruleName); - private static native boolean SR_RecognizerCheckGrammarConsistency(long recognizer, - long grammar); - private static native int SR_RecognizerPutAudio(long recognizer, byte[] buffer, int offset, - int length, boolean isLast); - private static native int SR_RecognizerAdvance(long recognizer); - // private static native void SR_RecognizerLoadUtterance(long recognizer, - // const LCHAR* filename); - // private static native void SR_RecognizerLoadWaveFile(long recognizer, - // const LCHAR* filename); - // private static native void SR_RecognizerSetLockFunction(long recognizer, - // SR_RecognizerLockFunction function, void* data); - private static native boolean SR_RecognizerIsSignalClipping(long recognizer); - private static native boolean SR_RecognizerIsSignalDCOffset(long recognizer); - private static native boolean SR_RecognizerIsSignalNoisy(long recognizer); - private static native boolean SR_RecognizerIsSignalTooQuiet(long recognizer); - private static native boolean SR_RecognizerIsSignalTooFewSamples(long recognizer); - private static native boolean SR_RecognizerIsSignalTooManySamples(long recognizer); - // private static native void SR_Recognizer_Change_Sample_Rate (size_t new_sample_rate); - - - // - // SR_AcousticState native methods - // - private static native void SR_AcousticStateReset(long recognizer); - private static native void SR_AcousticStateSet(long recognizer, String state); - private static native String SR_AcousticStateGet(long recognizer); - - - // - // SR_Grammar native methods - // - private static native void SR_GrammarCompile(long grammar); - private static native void SR_GrammarAddWordToSlot(long grammar, String slot, - String word, String pronunciation, int weight, String tag); - private static native void SR_GrammarResetAllSlots(long grammar); - // private static native void SR_GrammarAddNametagToSlot(long grammar, String slot, - // const struct SR_Nametag_t* nametag, int weight, String tag); - private static native void SR_GrammarSetupVocabulary(long grammar, long vocabulary); - // private static native void SR_GrammarSetupModels(long grammar, SR_AcousticModels* models); - private static native void SR_GrammarSetupRecognizer(long grammar, long recognizer); - private static native void SR_GrammarUnsetupRecognizer(long grammar); - // private static native void SR_GrammarGetModels(long grammar,SR_AcousticModels** models); - private static native long SR_GrammarCreate(); - private static native void SR_GrammarDestroy(long grammar); - private static native long SR_GrammarLoad(String filename); - private static native void SR_GrammarSave(long grammar, String filename); - // private static native void SR_GrammarSetDispatchFunction(long grammar, - // const LCHAR* name, void* userData, SR_GrammarDispatchFunction function); - // private static native void SR_GrammarSetParameter(long grammar, const - // LCHAR* key, void* value); - // private static native void SR_GrammarSetSize_tParameter(long grammar, - // const LCHAR* key, size_t value); - // private static native void SR_GrammarGetParameter(long grammar, const - // LCHAR* key, void** value); - // private static native void SR_GrammarGetSize_tParameter(long grammar, - // const LCHAR* key, size_t* value); - // private static native void SR_GrammarCheckParse(long grammar, const LCHAR* - // transcription, SR_SemanticResult** result, size_t* resultCount); - private static native void SR_GrammarAllowOnly(long grammar, String transcription); - private static native void SR_GrammarAllowAll(long grammar); - - - // - // SR_Vocabulary native methods - // - // private static native int SR_VocabularyCreate(); - private static native long SR_VocabularyLoad(); - // private static native void SR_VocabularySave(SR_Vocabulary* self, - // const LCHAR* filename); - // private static native void SR_VocabularyAddWord(SR_Vocabulary* self, - // const LCHAR* word); - // private static native void SR_VocabularyGetLanguage(SR_Vocabulary* self, - // ESR_Locale* locale); - private static native void SR_VocabularyDestroy(long vocabulary); - private static native String SR_VocabularyGetPronunciation(long vocabulary, String word); - - - // - // SR_RecognizerResult native methods - // - private static native byte[] SR_RecognizerResultGetWaveform(long recognizer); - private static native int SR_RecognizerResultGetSize(long recognizer); - private static native int SR_RecognizerResultGetKeyCount(long recognizer, int nbest); - private static native String[] SR_RecognizerResultGetKeyList(long recognizer, int nbest); - private static native String SR_RecognizerResultGetValue(long recognizer, - int nbest, String key); - // private static native void SR_RecognizerResultGetLocale(long recognizer, ESR_Locale* locale); -} diff --git a/core/java/android/speech/srec/UlawEncoderInputStream.java b/core/java/android/speech/srec/UlawEncoderInputStream.java deleted file mode 100644 index a488ead..0000000 --- a/core/java/android/speech/srec/UlawEncoderInputStream.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * --------------------------------------------------------------------------- - * UlawEncoderInputStream.java - * - * Copyright 2008 Nuance Communciations, Inc. - * - * Licensed under the Apache License, Version 2.0 (the 'License'); you may not - * use this file except in compliance with the License. - * - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * --------------------------------------------------------------------------- - */ - -package android.speech.srec; - -import java.io.IOException; -import java.io.InputStream; - -/** - * InputStream which transforms 16 bit pcm data to ulaw data. - * - * Not yet ready to be supported, so - * @hide - */ -public final class UlawEncoderInputStream extends InputStream { - private final static String TAG = "UlawEncoderInputStream"; - - private final static int MAX_ULAW = 8192; - private final static int SCALE_BITS = 16; - - private InputStream mIn; - - private int mMax = 0; - - private final byte[] mBuf = new byte[1024]; - private int mBufCount = 0; // should be 0 or 1 - - private final byte[] mOneByte = new byte[1]; - - - public static void encode(byte[] pcmBuf, int pcmOffset, - byte[] ulawBuf, int ulawOffset, int length, int max) { - - // from 'ulaw' in wikipedia - // +8191 to +8159 0x80 - // +8158 to +4063 in 16 intervals of 256 0x80 + interval number - // +4062 to +2015 in 16 intervals of 128 0x90 + interval number - // +2014 to +991 in 16 intervals of 64 0xA0 + interval number - // +990 to +479 in 16 intervals of 32 0xB0 + interval number - // +478 to +223 in 16 intervals of 16 0xC0 + interval number - // +222 to +95 in 16 intervals of 8 0xD0 + interval number - // +94 to +31 in 16 intervals of 4 0xE0 + interval number - // +30 to +1 in 15 intervals of 2 0xF0 + interval number - // 0 0xFF - - // -1 0x7F - // -31 to -2 in 15 intervals of 2 0x70 + interval number - // -95 to -32 in 16 intervals of 4 0x60 + interval number - // -223 to -96 in 16 intervals of 8 0x50 + interval number - // -479 to -224 in 16 intervals of 16 0x40 + interval number - // -991 to -480 in 16 intervals of 32 0x30 + interval number - // -2015 to -992 in 16 intervals of 64 0x20 + interval number - // -4063 to -2016 in 16 intervals of 128 0x10 + interval number - // -8159 to -4064 in 16 intervals of 256 0x00 + interval number - // -8192 to -8160 0x00 - - // set scale factors - if (max <= 0) max = MAX_ULAW; - - int coef = MAX_ULAW * (1 << SCALE_BITS) / max; - - for (int i = 0; i < length; i++) { - int pcm = (0xff & pcmBuf[pcmOffset++]) + (pcmBuf[pcmOffset++] << 8); - pcm = (pcm * coef) >> SCALE_BITS; - - int ulaw; - if (pcm >= 0) { - ulaw = pcm <= 0 ? 0xff : - pcm <= 30 ? 0xf0 + (( 30 - pcm) >> 1) : - pcm <= 94 ? 0xe0 + (( 94 - pcm) >> 2) : - pcm <= 222 ? 0xd0 + (( 222 - pcm) >> 3) : - pcm <= 478 ? 0xc0 + (( 478 - pcm) >> 4) : - pcm <= 990 ? 0xb0 + (( 990 - pcm) >> 5) : - pcm <= 2014 ? 0xa0 + ((2014 - pcm) >> 6) : - pcm <= 4062 ? 0x90 + ((4062 - pcm) >> 7) : - pcm <= 8158 ? 0x80 + ((8158 - pcm) >> 8) : - 0x80; - } else { - ulaw = -1 <= pcm ? 0x7f : - -31 <= pcm ? 0x70 + ((pcm - -31) >> 1) : - -95 <= pcm ? 0x60 + ((pcm - -95) >> 2) : - -223 <= pcm ? 0x50 + ((pcm - -223) >> 3) : - -479 <= pcm ? 0x40 + ((pcm - -479) >> 4) : - -991 <= pcm ? 0x30 + ((pcm - -991) >> 5) : - -2015 <= pcm ? 0x20 + ((pcm - -2015) >> 6) : - -4063 <= pcm ? 0x10 + ((pcm - -4063) >> 7) : - -8159 <= pcm ? 0x00 + ((pcm - -8159) >> 8) : - 0x00; - } - ulawBuf[ulawOffset++] = (byte)ulaw; - } - } - - /** - * Compute the maximum of the absolute value of the pcm samples. - * The return value can be used to set ulaw encoder scaling. - * @param pcmBuf array containing 16 bit pcm data. - * @param offset offset of start of 16 bit pcm data. - * @param length number of pcm samples (not number of input bytes) - * @return maximum abs of pcm data values - */ - public static int maxAbsPcm(byte[] pcmBuf, int offset, int length) { - int max = 0; - for (int i = 0; i < length; i++) { - int pcm = (0xff & pcmBuf[offset++]) + (pcmBuf[offset++] << 8); - if (pcm < 0) pcm = -pcm; - if (pcm > max) max = pcm; - } - return max; - } - - /** - * Create an InputStream which takes 16 bit pcm data and produces ulaw data. - * @param in InputStream containing 16 bit pcm data. - * @param max pcm value corresponding to maximum ulaw value. - */ - public UlawEncoderInputStream(InputStream in, int max) { - mIn = in; - mMax = max; - } - - @Override - public int read(byte[] buf, int offset, int length) throws IOException { - if (mIn == null) throw new IllegalStateException("not open"); - - // return at least one byte, but try to fill 'length' - while (mBufCount < 2) { - int n = mIn.read(mBuf, mBufCount, Math.min(length * 2, mBuf.length - mBufCount)); - if (n == -1) return -1; - mBufCount += n; - } - - // compand data - int n = Math.min(mBufCount / 2, length); - encode(mBuf, 0, buf, offset, n, mMax); - - // move data to bottom of mBuf - mBufCount -= n * 2; - for (int i = 0; i < mBufCount; i++) mBuf[i] = mBuf[i + n * 2]; - - return n; - } - - @Override - public int read(byte[] buf) throws IOException { - return read(buf, 0, buf.length); - } - - @Override - public int read() throws IOException { - int n = read(mOneByte, 0, 1); - if (n == -1) return -1; - return 0xff & (int)mOneByte[0]; - } - - @Override - public void close() throws IOException { - if (mIn != null) { - InputStream in = mIn; - mIn = null; - in.close(); - } - } - - @Override - public int available() throws IOException { - return (mIn.available() + mBufCount) / 2; - } -} diff --git a/core/java/android/speech/srec/WaveHeader.java b/core/java/android/speech/srec/WaveHeader.java deleted file mode 100644 index 4c3b172..0000000 --- a/core/java/android/speech/srec/WaveHeader.java +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package android.speech.srec; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -/** - * This class represents the header of a WAVE format audio file, which usually - * have a .wav suffix. The following integer valued fields are contained: - * <ul> - * <li> format - usually PCM, ALAW or ULAW. - * <li> numChannels - 1 for mono, 2 for stereo. - * <li> sampleRate - usually 8000, 11025, 16000, 22050, or 44100 hz. - * <li> bitsPerSample - usually 16 for PCM, 8 for ALAW, or 8 for ULAW. - * <li> numBytes - size of audio data after this header, in bytes. - * </ul> - * - * Not yet ready to be supported, so - * @hide - */ -public class WaveHeader { - - // follows WAVE format in http://ccrma.stanford.edu/courses/422/projects/WaveFormat - - private static final String TAG = "WaveHeader"; - - private static final int HEADER_LENGTH = 44; - - /** Indicates PCM format. */ - public static final short FORMAT_PCM = 1; - /** Indicates ALAW format. */ - public static final short FORMAT_ALAW = 6; - /** Indicates ULAW format. */ - public static final short FORMAT_ULAW = 7; - - private short mFormat; - private short mNumChannels; - private int mSampleRate; - private short mBitsPerSample; - private int mNumBytes; - - /** - * Construct a WaveHeader, with all fields defaulting to zero. - */ - public WaveHeader() { - } - - /** - * Construct a WaveHeader, with fields initialized. - * @param format format of audio data, - * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}. - * @param numChannels 1 for mono, 2 for stereo. - * @param sampleRate typically 8000, 11025, 16000, 22050, or 44100 hz. - * @param bitsPerSample usually 16 for PCM, 8 for ULAW or 8 for ALAW. - * @param numBytes size of audio data after this header, in bytes. - */ - public WaveHeader(short format, short numChannels, int sampleRate, short bitsPerSample, int numBytes) { - mFormat = format; - mSampleRate = sampleRate; - mNumChannels = numChannels; - mBitsPerSample = bitsPerSample; - mNumBytes = numBytes; - } - - /** - * Get the format field. - * @return format field, - * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}. - */ - public short getFormat() { - return mFormat; - } - - /** - * Set the format field. - * @param format - * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}. - * @return reference to this WaveHeader instance. - */ - public WaveHeader setFormat(short format) { - mFormat = format; - return this; - } - - /** - * Get the number of channels. - * @return number of channels, 1 for mono, 2 for stereo. - */ - public short getNumChannels() { - return mNumChannels; - } - - /** - * Set the number of channels. - * @param numChannels 1 for mono, 2 for stereo. - * @return reference to this WaveHeader instance. - */ - public WaveHeader setNumChannels(short numChannels) { - mNumChannels = numChannels; - return this; - } - - /** - * Get the sample rate. - * @return sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz. - */ - public int getSampleRate() { - return mSampleRate; - } - - /** - * Set the sample rate. - * @param sampleRate sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz. - * @return reference to this WaveHeader instance. - */ - public WaveHeader setSampleRate(int sampleRate) { - mSampleRate = sampleRate; - return this; - } - - /** - * Get the number of bits per sample. - * @return number of bits per sample, - * usually 16 for PCM, 8 for ULAW or 8 for ALAW. - */ - public short getBitsPerSample() { - return mBitsPerSample; - } - - /** - * Set the number of bits per sample. - * @param bitsPerSample number of bits per sample, - * usually 16 for PCM, 8 for ULAW or 8 for ALAW. - * @return reference to this WaveHeader instance. - */ - public WaveHeader setBitsPerSample(short bitsPerSample) { - mBitsPerSample = bitsPerSample; - return this; - } - - /** - * Get the size of audio data after this header, in bytes. - * @return size of audio data after this header, in bytes. - */ - public int getNumBytes() { - return mNumBytes; - } - - /** - * Set the size of audio data after this header, in bytes. - * @param numBytes size of audio data after this header, in bytes. - * @return reference to this WaveHeader instance. - */ - public WaveHeader setNumBytes(int numBytes) { - mNumBytes = numBytes; - return this; - } - - /** - * Read and initialize a WaveHeader. - * @param in {@link java.io.InputStream} to read from. - * @return number of bytes consumed. - * @throws IOException - */ - public int read(InputStream in) throws IOException { - /* RIFF header */ - readId(in, "RIFF"); - int numBytes = readInt(in) - 36; - readId(in, "WAVE"); - - /* fmt chunk */ - readId(in, "fmt "); - if (16 != readInt(in)) throw new IOException("fmt chunk length not 16"); - mFormat = readShort(in); - mNumChannels = readShort(in); - mSampleRate = readInt(in); - int byteRate = readInt(in); - short blockAlign = readShort(in); - mBitsPerSample = readShort(in); - if (byteRate != mNumChannels * mSampleRate * mBitsPerSample / 8) { - throw new IOException("fmt.ByteRate field inconsistent"); - } - if (blockAlign != mNumChannels * mBitsPerSample / 8) { - throw new IOException("fmt.BlockAlign field inconsistent"); - } - - /* data chunk */ - readId(in, "data"); - mNumBytes = readInt(in); - - return HEADER_LENGTH; - } - - private static void readId(InputStream in, String id) throws IOException { - for (int i = 0; i < id.length(); i++) { - if (id.charAt(i) != in.read()) throw new IOException( id + " tag not present"); - } - } - - private static int readInt(InputStream in) throws IOException { - return in.read() | (in.read() << 8) | (in.read() << 16) | (in.read() << 24); - } - - private static short readShort(InputStream in) throws IOException { - return (short)(in.read() | (in.read() << 8)); - } - - /** - * Write a WAVE file header. - * @param out {@link java.io.OutputStream} to receive the header. - * @return number of bytes written. - * @throws IOException - */ - public int write(OutputStream out) throws IOException { - /* RIFF header */ - writeId(out, "RIFF"); - writeInt(out, 36 + mNumBytes); - writeId(out, "WAVE"); - - /* fmt chunk */ - writeId(out, "fmt "); - writeInt(out, 16); - writeShort(out, mFormat); - writeShort(out, mNumChannels); - writeInt(out, mSampleRate); - writeInt(out, mNumChannels * mSampleRate * mBitsPerSample / 8); - writeShort(out, (short)(mNumChannels * mBitsPerSample / 8)); - writeShort(out, mBitsPerSample); - - /* data chunk */ - writeId(out, "data"); - writeInt(out, mNumBytes); - - return HEADER_LENGTH; - } - - private static void writeId(OutputStream out, String id) throws IOException { - for (int i = 0; i < id.length(); i++) out.write(id.charAt(i)); - } - - private static void writeInt(OutputStream out, int val) throws IOException { - out.write(val >> 0); - out.write(val >> 8); - out.write(val >> 16); - out.write(val >> 24); - } - - private static void writeShort(OutputStream out, short val) throws IOException { - out.write(val >> 0); - out.write(val >> 8); - } - - @Override - public String toString() { - return String.format( - "WaveHeader format=%d numChannels=%d sampleRate=%d bitsPerSample=%d numBytes=%d", - mFormat, mNumChannels, mSampleRate, mBitsPerSample, mNumBytes); - } - -} diff --git a/core/java/android/speech/srec/package.html b/core/java/android/speech/srec/package.html deleted file mode 100644 index 9a99df8..0000000 --- a/core/java/android/speech/srec/package.html +++ /dev/null @@ -1,6 +0,0 @@ -<HTML> -<BODY> -Simple, synchronous SREC speech recognition API. -@hide -</BODY> -</HTML> |