summaryrefslogtreecommitdiffstats
path: root/core/java/android/speech
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:31:44 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:31:44 -0800
commit9066cfe9886ac131c34d59ed0e2d287b0e3c0087 (patch)
treed88beb88001f2482911e3d28e43833b50e4b4e97 /core/java/android/speech
parentd83a98f4ce9cfa908f5c54bbd70f03eec07e7553 (diff)
downloadframeworks_base-9066cfe9886ac131c34d59ed0e2d287b0e3c0087.zip
frameworks_base-9066cfe9886ac131c34d59ed0e2d287b0e3c0087.tar.gz
frameworks_base-9066cfe9886ac131c34d59ed0e2d287b0e3c0087.tar.bz2
auto import from //depot/cupcake/@135843
Diffstat (limited to 'core/java/android/speech')
-rw-r--r--core/java/android/speech/RecognizerIntent.java157
-rw-r--r--core/java/android/speech/srec/MicrophoneInputStream.java110
-rw-r--r--core/java/android/speech/srec/Recognizer.java719
-rw-r--r--core/java/android/speech/srec/UlawEncoderInputStream.java186
-rw-r--r--core/java/android/speech/srec/WaveHeader.java274
-rw-r--r--core/java/android/speech/srec/package.html6
6 files changed, 1452 insertions, 0 deletions
diff --git a/core/java/android/speech/RecognizerIntent.java b/core/java/android/speech/RecognizerIntent.java
new file mode 100644
index 0000000..987e763
--- /dev/null
+++ b/core/java/android/speech/RecognizerIntent.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.speech;
+
+import android.app.Activity;
+import android.content.ActivityNotFoundException;
+import android.content.Intent;
+
+/**
+ * Constants for supporting speech recognition through starting an {@link Intent}
+ */
+public class RecognizerIntent {
+ private RecognizerIntent() {
+ // Not for instantiating.
+ }
+
+ /**
+ * Starts an activity that will prompt the user for speech and sends it through a
+ * speech recognizer. The results will be returned via activity results, or forwarded
+ * via a PendingIntent if one is provided.
+ *
+ * <p>Required extras:
+ * <ul>
+ * <li>{@link #EXTRA_LANGUAGE_MODEL}
+ * </ul>
+ *
+ * <p>Optional extras:
+ * <ul>
+ * <li>{@link #EXTRA_PROMPT}
+ * <li>{@link #EXTRA_LANGUAGE}
+ * <li>{@link #EXTRA_MAX_RESULTS}
+ * <li>{@link #EXTRA_RESULTS_PENDINGINTENT}
+ * <li>{@link #EXTRA_RESULTS_PENDINGINTENT_BUNDLE}
+ * </ul>
+ *
+ * <p> Result extras:
+ * <ul>
+ * <li>{@link #EXTRA_RESULTS}
+ * </ul>
+ *
+ * <p>NOTE: There may not be any applications installed to handle this action, so you should
+ * make sure to catch {@link ActivityNotFoundException}.
+ */
+ public static final String ACTION_RECOGNIZE_SPEECH = "android.speech.action.RECOGNIZE_SPEECH";
+
+ /**
+ * Starts an activity that will prompt the user for speech, sends it through a
+ * speech recognizer, and invokes and displays a web search result.
+ *
+ * <p>Required extras:
+ * <ul>
+ * <li>{@link #EXTRA_LANGUAGE_MODEL}
+ * </ul>
+ *
+ * <p>Optional extras:
+ * <ul>
+ * <li>{@link #EXTRA_PROMPT}
+ * <li>{@link #EXTRA_LANGUAGE}
+ * <li>{@link #EXTRA_MAX_RESULTS}
+ * </ul>
+ *
+ * <p> Result extras:
+ * <ul>
+ * <li>{@link #EXTRA_RESULTS}
+ * </ul>
+ *
+ * <p>NOTE: There may not be any applications installed to handle this action, so you should
+ * make sure to catch {@link ActivityNotFoundException}.
+ */
+ public static final String ACTION_WEB_SEARCH = "android.speech.action.WEB_SEARCH";
+
+ /**
+ * Informs the recognizer which speech model to prefer when performing
+ * {@link #ACTION_RECOGNIZE_SPEECH}. The recognizer uses this
+ * information to fine tune the results. This extra is required. Activities implementing
+ * {@link #ACTION_RECOGNIZE_SPEECH} may interpret the values as they see fit.
+ *
+ * @see #LANGUAGE_MODEL_FREE_FORM
+ * @see #LANGUAGE_MODEL_WEB_SEARCH
+ */
+ public static final String EXTRA_LANGUAGE_MODEL = "android.speech.extra.LANGUAGE_MODEL";
+
+ /**
+ * Use a language model based on free-form speech recognition. This is a value to use for
+ * {@link #EXTRA_LANGUAGE_MODEL}.
+ * @see #EXTRA_LANGUAGE_MODEL
+ */
+ public static final String LANGUAGE_MODEL_FREE_FORM = "free_form";
+ /**
+ * Use a language model based on web search terms. This is a value to use for
+ * {@link #EXTRA_LANGUAGE_MODEL}.
+ * @see #EXTRA_LANGUAGE_MODEL
+ */
+ public static final String LANGUAGE_MODEL_WEB_SEARCH = "web_search";
+
+ /** Optional text prompt to show to the user when asking them to speak. */
+ public static final String EXTRA_PROMPT = "android.speech.extra.PROMPT";
+
+ /**
+ * Optional language override to inform the recognizer that it should expect speech in
+ * a language different than the one set in the {@link java.util.Locale#getDefault()}.
+ */
+ public static final String EXTRA_LANGUAGE = "android.speech.extra.LANGUAGE";
+
+ /**
+ * Optional limit on the maximum number of results to return. If omitted the recognizer
+ * will choose how many results to return. Must be an integer.
+ */
+ public static final String EXTRA_MAX_RESULTS = "android.speech.extra.MAX_RESULTS";
+
+ /**
+ * When the intent is {@link #ACTION_RECOGNIZE_SPEECH}, the speech input activity will
+ * return results to you via the activity results mechanism. Alternatively, if you use this
+ * extra to supply a PendingIntent, the results will be added to its bundle and the
+ * PendingIntent will be sent to its target.
+ */
+ public static final String EXTRA_RESULTS_PENDINGINTENT =
+ "android.speech.extra.RESULTS_PENDINGINTENT";
+ /**
+ * If you use {@link #EXTRA_RESULTS_PENDINGINTENT} to supply a forwarding intent, you can
+ * also use this extra to supply additional extras for the final intent. The search results
+ * will be added to this bundle, and the combined bundle will be sent to the target.
+ */
+ public static final String EXTRA_RESULTS_PENDINGINTENT_BUNDLE =
+ "android.speech.extra.RESULTS_PENDINGINTENT_BUNDLE";
+
+ /** Result code returned when no matches are found for the given speech */
+ public static final int RESULT_NO_MATCH = Activity.RESULT_FIRST_USER;
+ /** Result code returned when there is a generic client error */
+ public static final int RESULT_CLIENT_ERROR = Activity.RESULT_FIRST_USER + 1;
+ /** Result code returned when the recognition server returns an error */
+ public static final int RESULT_SERVER_ERROR = Activity.RESULT_FIRST_USER + 2;
+ /** Result code returned when a network error was encountered */
+ public static final int RESULT_NETWORK_ERROR = Activity.RESULT_FIRST_USER + 3;
+ /** Result code returned when an audio error was encountered */
+ public static final int RESULT_AUDIO_ERROR = Activity.RESULT_FIRST_USER + 4;
+
+ /**
+ * An ArrayList<String> of the potential results when performing
+ * {@link #ACTION_RECOGNIZE_SPEECH}. Only present when {@link Activity#RESULT_OK} is returned.
+ */
+ public static final String EXTRA_RESULTS = "android.speech.extra.RESULTS";
+}
diff --git a/core/java/android/speech/srec/MicrophoneInputStream.java b/core/java/android/speech/srec/MicrophoneInputStream.java
new file mode 100644
index 0000000..fab77a9
--- /dev/null
+++ b/core/java/android/speech/srec/MicrophoneInputStream.java
@@ -0,0 +1,110 @@
+/*---------------------------------------------------------------------------*
+ * MicrophoneInputStream.java *
+ * *
+ * Copyright 2007 Nuance Communciations, Inc. *
+ * *
+ * Licensed under the Apache License, Version 2.0 (the 'License'); *
+ * you may not use this file except in compliance with the License. *
+ * *
+ * You may obtain a copy of the License at *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, software *
+ * distributed under the License is distributed on an 'AS IS' BASIS, *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
+ * See the License for the specific language governing permissions and *
+ * limitations under the License. *
+ * *
+ *---------------------------------------------------------------------------*/
+
+
+package android.speech.srec;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.IllegalStateException;
+
+
+/**
+ * PCM input stream from the microphone, 16 bits per sample.
+ */
+public final class MicrophoneInputStream extends InputStream {
+ static {
+ System.loadLibrary("srec_jni");
+ }
+
+ private final static String TAG = "MicrophoneInputStream";
+ private int mAudioRecord = 0;
+ private byte[] mOneByte = new byte[1];
+
+ /**
+ * MicrophoneInputStream constructor.
+ * @param sampleRate sample rate of the microphone, typically 11025 or 8000.
+ * @param fifoDepth depth of the real time fifo, measured in sampleRate clock ticks.
+ * This determines how long an application may delay before losing data.
+ */
+ public MicrophoneInputStream(int sampleRate, int fifoDepth) throws IOException {
+ mAudioRecord = AudioRecordNew(sampleRate, fifoDepth);
+ if (mAudioRecord == 0) throw new IOException("AudioRecord constructor failed - busy?");
+ int status = AudioRecordStart(mAudioRecord);
+ if (status != 0) {
+ close();
+ throw new IOException("AudioRecord start failed: " + status);
+ }
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (mAudioRecord == 0) throw new IllegalStateException("not open");
+ int rtn = AudioRecordRead(mAudioRecord, mOneByte, 0, 1);
+ return rtn == 1 ? ((int)mOneByte[0] & 0xff) : -1;
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ if (mAudioRecord == 0) throw new IllegalStateException("not open");
+ return AudioRecordRead(mAudioRecord, b, 0, b.length);
+ }
+
+ @Override
+ public int read(byte[] b, int offset, int length) throws IOException {
+ if (mAudioRecord == 0) throw new IllegalStateException("not open");
+ // TODO: should we force all reads to be a multiple of the sample size?
+ return AudioRecordRead(mAudioRecord, b, offset, length);
+ }
+
+ /**
+ * Closes this stream.
+ */
+ @Override
+ public void close() throws IOException {
+ if (mAudioRecord != 0) {
+ try {
+ AudioRecordStop(mAudioRecord);
+ } finally {
+ try {
+ AudioRecordDelete(mAudioRecord);
+ } finally {
+ mAudioRecord = 0;
+ }
+ }
+ }
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ if (mAudioRecord != 0) {
+ close();
+ throw new IOException("someone forgot to close MicrophoneInputStream");
+ }
+ }
+
+ //
+ // AudioRecord JNI interface
+ //
+ private static native int AudioRecordNew(int sampleRate, int fifoDepth);
+ private static native int AudioRecordStart(int audioRecord);
+ private static native int AudioRecordRead(int audioRecord, byte[] b, int offset, int length) throws IOException;
+ private static native void AudioRecordStop(int audioRecord) throws IOException;
+ private static native void AudioRecordDelete(int audioRecord) throws IOException;
+}
diff --git a/core/java/android/speech/srec/Recognizer.java b/core/java/android/speech/srec/Recognizer.java
new file mode 100644
index 0000000..a03a36a
--- /dev/null
+++ b/core/java/android/speech/srec/Recognizer.java
@@ -0,0 +1,719 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Recognizer.java
+ *
+ * Copyright 2007 Nuance Communciations, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the 'License'); you may not
+ * use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * ---------------------------------------------------------------------------
+ */
+
+
+package android.speech.srec;
+
+import android.util.Config;
+import android.util.Log;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.IOException;
+import java.util.Locale;
+
+/**
+ * Simple, synchronous speech recognizer, using the Nuance SREC package.
+ * Usages proceeds as follows:
+ *
+ * <ul>
+ * <li>Create a <code>Recognizer</code>.
+ * <li>Create a <code>Recognizer.Grammar</code>.
+ * <li>Setup the <code>Recognizer.Grammar</code>.
+ * <li>Reset the <code>Recognizer.Grammar</code> slots, if needed.
+ * <li>Fill the <code>Recognizer.Grammar</code> slots, if needed.
+ * <li>Compile the <code>Recognizer.Grammar</code>, if needed.
+ * <li>Save the filled <code>Recognizer.Grammar</code>, if needed.
+ * <li>Start the <code>Recognizer</code>.
+ * <li>Loop over <code>advance</code> and <code>putAudio</code> until recognition complete.
+ * <li>Fetch and process results, or notify of failure.
+ * <li>Stop the <code>Recognizer</code>.
+ * <li>Destroy the <code>Recognizer</code>.
+ * </ul>
+ *
+ * <p>Below is example code</p>
+ *
+ * <pre class="prettyprint">
+ *
+ * // create and start audio input
+ * InputStream audio = new MicrophoneInputStream(11025, 11025*5);
+ * // create a Recognizer
+ * String cdir = Recognizer.getConfigDir(null);
+ * Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par");
+ * // create and load a Grammar
+ * Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g");
+ * // setup the Grammar to work with the Recognizer
+ * grammar.setupRecognizer();
+ * // fill the Grammar slots with names and save, if required
+ * grammar.resetAllSlots();
+ * for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1");
+ * grammar.compile();
+ * grammar.save(".../foo.g2g");
+ * // start the Recognizer
+ * recognizer.start();
+ * // loop over Recognizer events
+ * while (true) {
+ * switch (recognizer.advance()) {
+ * case Recognizer.EVENT_INCOMPLETE:
+ * case Recognizer.EVENT_STARTED:
+ * case Recognizer.EVENT_START_OF_VOICING:
+ * case Recognizer.EVENT_END_OF_VOICING:
+ * // let the Recognizer continue to run
+ * continue;
+ * case Recognizer.EVENT_RECOGNITION_RESULT:
+ * // success, so fetch results here!
+ * for (int i = 0; i < recognizer.getResultCount(); i++) {
+ * String result = recognizer.getResult(i, Recognizer.KEY_LITERAL);
+ * }
+ * break;
+ * case Recognizer.EVENT_NEED_MORE_AUDIO:
+ * // put more audio in the Recognizer
+ * recognizer.putAudio(audio);
+ * continue;
+ * default:
+ * notifyFailure();
+ * break;
+ * }
+ * break;
+ * }
+ * // stop the Recognizer
+ * recognizer.stop();
+ * // destroy the Recognizer
+ * recognizer.destroy();
+ * // stop the audio device
+ * audio.close();
+ *
+ * </pre>
+ */
+public final class Recognizer {
+ static {
+ System.loadLibrary("srec_jni");
+ }
+
+ private static String TAG = "Recognizer";
+
+ /**
+ * Result key corresponding to confidence score.
+ */
+ public static final String KEY_CONFIDENCE = "conf";
+
+ /**
+ * Result key corresponding to literal text.
+ */
+ public static final String KEY_LITERAL = "literal";
+
+ /**
+ * Result key corresponding to semantic meaning text.
+ */
+ public static final String KEY_MEANING = "meaning";
+
+ // handle to SR_Vocabulary object
+ private int mVocabulary = 0;
+
+ // handle to SR_Recognizer object
+ private int mRecognizer = 0;
+
+ // Grammar currently associated with Recognizer via SR_GrammarSetupRecognizer
+ private Grammar mActiveGrammar = null;
+
+ /**
+ * Get the pathname of the SREC configuration directory corresponding to the
+ * language indicated by the Locale.
+ * This directory contains dictionaries, speech models,
+ * configuration files, and other data needed by the Recognizer.
+ * @param locale <code>Locale</code> corresponding to the desired language,
+ * or null for default, currently <code>Locale.US</code>.
+ * @return Pathname of the configuration directory.
+ */
+ public static String getConfigDir(Locale locale) {
+ if (locale == null) locale = Locale.US;
+ String dir = "/system/usr/srec/config/" +
+ locale.toString().replace('_', '.').toLowerCase();
+ if ((new File(dir)).isDirectory()) return dir;
+ return null;
+ }
+
+ /**
+ * Create an instance of a SREC speech recognizer.
+ *
+ * @param configFile pathname of the baseline*.par configuration file,
+ * which in turn contains references to dictionaries, speech models,
+ * and other data needed to configure and operate the recognizer.
+ * A separate config file is needed for each audio sample rate.
+ * Two files, baseline11k.par and baseline8k.par, which correspond to
+ * 11025 and 8000 hz, are present in the directory indicated by
+ * {@link #getConfigDir}.
+ * @throws IOException
+ */
+ public Recognizer(String configFile) throws IOException {
+ PMemInit();
+ SR_SessionCreate(configFile);
+ mRecognizer = SR_RecognizerCreate();
+ SR_RecognizerSetup(mRecognizer);
+ mVocabulary = SR_VocabularyLoad();
+ }
+
+ /**
+ * Represents a grammar loaded into the Recognizer.
+ */
+ public class Grammar {
+ private int mGrammar = 0;
+
+ /**
+ * Create a <code>Grammar</code> instance.
+ * @param g2gFileName pathname of g2g file.
+ */
+ public Grammar(String g2gFileName) throws IOException {
+ mGrammar = SR_GrammarLoad(g2gFileName);
+ SR_GrammarSetupVocabulary(mGrammar, mVocabulary);
+ }
+
+ /**
+ * Reset all slots.
+ */
+ public void resetAllSlots() {
+ SR_GrammarResetAllSlots(mGrammar);
+ }
+
+ /**
+ * Add a word to a slot.
+ *
+ * @param slot slot name.
+ * @param word word to insert.
+ * @param pron pronunciation, or null to derive from word.
+ * @param weight weight to give the word. One is normal, 50 is low.
+ * @param tag semantic meaning tag string.
+ */
+ public void addWordToSlot(String slot, String word, String pron, int weight, String tag) {
+ SR_GrammarAddWordToSlot(mGrammar, slot, word, pron, weight, tag);
+ }
+
+ /**
+ * Compile all slots.
+ */
+ public void compile() {
+ SR_GrammarCompile(mGrammar);
+ }
+
+ /**
+ * Setup <code>Grammar</code> with <code>Recognizer</code>.
+ */
+ public void setupRecognizer() {
+ SR_GrammarSetupRecognizer(mGrammar, mRecognizer);
+ mActiveGrammar = this;
+ }
+
+ /**
+ * Save <code>Grammar</code> to g2g file.
+ *
+ * @param g2gFileName
+ * @throws IOException
+ */
+ public void save(String g2gFileName) throws IOException {
+ SR_GrammarSave(mGrammar, g2gFileName);
+ }
+
+ /**
+ * Release resources associated with this <code>Grammar</code>.
+ */
+ public void destroy() {
+ // TODO: need to do cleanup and disassociation with Recognizer
+ if (mGrammar != 0) {
+ SR_GrammarDestroy(mGrammar);
+ mGrammar = 0;
+ }
+ }
+
+ /**
+ * Clean up resources.
+ */
+ protected void finalize() {
+ if (mGrammar != 0) {
+ destroy();
+ throw new IllegalStateException("someone forgot to destroy Grammar");
+ }
+ }
+ }
+
+ /**
+ * Start recognition
+ */
+ public void start() {
+ // TODO: shouldn't be here?
+ SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1);
+ SR_RecognizerStart(mRecognizer);
+ }
+
+ /**
+ * Process some audio and return the current status.
+ * @return recognition event, one of:
+ * <ul>
+ * <li><code>EVENT_INVALID</code>
+ * <li><code>EVENT_NO_MATCH</code>
+ * <li><code>EVENT_INCOMPLETE</code>
+ * <li><code>EVENT_STARTED</code>
+ * <li><code>EVENT_STOPPED</code>
+ * <li><code>EVENT_START_OF_VOICING</code>
+ * <li><code>EVENT_END_OF_VOICING</code>
+ * <li><code>EVENT_SPOKE_TOO_SOON</code>
+ * <li><code>EVENT_RECOGNITION_RESULT</code>
+ * <li><code>EVENT_START_OF_UTTERANCE_TIMEOUT</code>
+ * <li><code>EVENT_RECOGNITION_TIMEOUT</code>
+ * <li><code>EVENT_NEED_MORE_AUDIO</code>
+ * <li><code>EVENT_MAX_SPEECH</code>
+ * </ul>
+ */
+ public int advance() {
+ return SR_RecognizerAdvance(mRecognizer);
+ }
+
+ /**
+ * Put audio samples into the <code>Recognizer</code>.
+ * @param buf holds the audio samples.
+ * @param offset offset of the first sample.
+ * @param length number of bytes containing samples.
+ * @param isLast indicates no more audio data, normally false.
+ * @return number of bytes accepted.
+ */
+ public int putAudio(byte[] buf, int offset, int length, boolean isLast) {
+ return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast);
+ }
+
+ /**
+ * Read audio samples from an <code>InputStream</code> and put them in the
+ * <code>Recognizer</code>.
+ * @param audio <code>InputStream</code> containing PCM audio samples.
+ */
+ public void putAudio(InputStream audio) throws IOException {
+ // make sure the audio buffer is allocated
+ if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512];
+ // read some data
+ int nbytes = audio.read(mPutAudioBuffer);
+ // eof, so signal Recognizer
+ if (nbytes == -1) {
+ SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true);
+ }
+ // put it into the Recognizer
+ else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) {
+ throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes);
+ }
+ }
+
+ // audio buffer for putAudio(InputStream)
+ private byte[] mPutAudioBuffer = null;
+
+ /**
+ * Get the number of recognition results. Must be called after
+ * <code>EVENT_RECOGNITION_RESULT</code> is returned by
+ * <code>advance</code>, but before <code>stop</code>.
+ *
+ * @return number of results in nbest list.
+ */
+ public int getResultCount() {
+ return SR_RecognizerResultGetSize(mRecognizer);
+ }
+
+ /**
+ * Get a set of keys for the result. Must be called after
+ * <code>EVENT_RECOGNITION_RESULT</code> is returned by
+ * <code>advance</code>, but before <code>stop</code>.
+ *
+ * @param index index of result.
+ * @return array of keys.
+ */
+ public String[] getResultKeys(int index) {
+ return SR_RecognizerResultGetKeyList(mRecognizer, index);
+ }
+
+ /**
+ * Get a result value. Must be called after
+ * <code>EVENT_RECOGNITION_RESULT</code> is returned by
+ * <code>advance</code>, but before <code>stop</code>.
+ *
+ * @param index index of the result.
+ * @param key key of the result. This is typically one of
+ * <code>KEY_CONFIDENCE</code>, <code>KEY_LITERAL</code>, or
+ * <code>KEY_MEANING</code>, but the user can also define their own keys
+ * in a grxml file, or in the <code>tag</code> slot of
+ * <code>Grammar.addWordToSlot</code>.
+ * @return the result.
+ */
+ public String getResult(int index, String key) {
+ return SR_RecognizerResultGetValue(mRecognizer, index, key);
+ }
+
+ /**
+ * Stop the <code>Recognizer</code>.
+ */
+ public void stop() {
+ SR_RecognizerStop(mRecognizer);
+ SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash");
+ }
+
+ /**
+ * Reset the acoustic state vectorto it's default value.
+ *
+ * @hide
+ */
+ public void resetAcousticState() {
+ SR_AcousticStateReset(mRecognizer);
+ }
+
+ /**
+ * Set the acoustic state vector.
+ * @param state String containing the acoustic state vector.
+ *
+ * @hide
+ */
+ public void setAcousticState(String state) {
+ SR_AcousticStateSet(mRecognizer, state);
+ }
+
+ /**
+ * Get the acoustic state vector.
+ * @return String containing the acoustic state vector.
+ *
+ * @hide
+ */
+ public String getAcousticState() {
+ return SR_AcousticStateGet(mRecognizer);
+ }
+
+ /**
+ * Clean up resources.
+ */
+ public void destroy() {
+ try {
+ if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary);
+ } finally {
+ mVocabulary = 0;
+ try {
+ if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer);
+ } finally {
+ try {
+ if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer);
+ } finally {
+ mRecognizer = 0;
+ try {
+ SR_SessionDestroy();
+ } finally {
+ PMemShutdown();
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Clean up resources.
+ */
+ protected void finalize() throws Throwable {
+ if (mVocabulary != 0 || mRecognizer != 0) {
+ destroy();
+ throw new IllegalStateException("someone forgot to destroy Recognizer");
+ }
+ }
+
+ /* an example session captured, for reference
+ void doall() {
+ if (PMemInit ( )
+ || lhs_audioinOpen ( WAVE_MAPPER, SREC_TEST_DEFAULT_AUDIO_FREQUENCY, &audio_in_handle )
+ || srec_test_init_application_data ( &applicationData, argc, argv )
+ || SR_SessionCreate ( "/system/usr/srec/config/en.us/baseline11k.par" )
+ || SR_RecognizerCreate ( &applicationData.recognizer )
+ || SR_RecognizerSetup ( applicationData.recognizer)
+ || ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), filename, &flen )
+ || SR_VocabularyLoad ( filename, &applicationData.vocabulary )
+ || SR_VocabularyGetLanguage ( applicationData.vocabulary, &applicationData.locale )
+ || (applicationData.nametag = NULL)
+ || SR_NametagsCreate ( &applicationData.nametags )
+ || (LSTRCPY ( applicationData.grammars [0].grammar_path, "/system/usr/srec/config/en.us/grammars/VoiceDialer.g2g" ), 0)
+ || (LSTRCPY ( applicationData.grammars [0].grammarID, "BothTags" ), 0)
+ || (LSTRCPY ( applicationData.grammars [0].ruleName, "trash" ), 0)
+ || (applicationData.grammars [0].is_ve_grammar = ESR_FALSE, 0)
+ || SR_GrammarLoad (applicationData.grammars [0].grammar_path, &applicationData.grammars [applicationData.grammarCount].grammar )
+ || SR_GrammarSetupVocabulary ( applicationData.grammars [0].grammar, applicationData.vocabulary )
+ || SR_GrammarSetupRecognizer( applicationData.grammars [0].grammar, applicationData.recognizer )
+ || SR_GrammarSetDispatchFunction ( applicationData.grammars [0].grammar, L("myDSMCallback"), NULL, myDSMCallback )
+ || (applicationData.grammarCount++, 0)
+ || SR_RecognizerActivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar,
+ applicationData.grammars [0].ruleName, 1 )
+ || (applicationData.active_grammar_num = 0, 0)
+ || lhs_audioinStart ( audio_in_handle )
+ || SR_RecognizerStart ( applicationData.recognizer )
+ || strl ( applicationData.grammars [0].grammar, &applicationData, audio_in_handle, &recognition_count )
+ || SR_RecognizerStop ( applicationData.recognizer )
+ || lhs_audioinStop ( audio_in_handle )
+ || SR_RecognizerDeactivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, applicationData.grammars [0].ruleName )
+ || (applicationData.active_grammar_num = -1, 0)
+ || SR_GrammarDestroy ( applicationData.grammars [0].grammar )
+ || (applicationData.grammarCount--, 0)
+ || SR_NametagsDestroy ( applicationData.nametags )
+ || (applicationData.nametags = NULL, 0)
+ || SR_VocabularyDestroy ( applicationData.vocabulary )
+ || (applicationData.vocabulary = NULL)
+ || SR_RecognizerUnsetup ( applicationData.recognizer) // releases acoustic models
+ || SR_RecognizerDestroy ( applicationData.recognizer )
+ || (applicationData.recognizer = NULL)
+ || SR_SessionDestroy ( )
+ || srec_test_shutdown_application_data ( &applicationData )
+ || lhs_audioinClose ( &audio_in_handle )
+ || PMemShutdown ( )
+ }
+ */
+
+
+ //
+ // PMem native methods
+ //
+ private static native void PMemInit();
+ private static native void PMemShutdown();
+
+
+ //
+ // SR_Session native methods
+ //
+ private static native void SR_SessionCreate(String filename);
+ private static native void SR_SessionDestroy();
+
+
+ //
+ // SR_Recognizer native methods
+ //
+
+ /**
+ * Reserved value.
+ */
+ public final static int EVENT_INVALID = 0;
+
+ /**
+ * <code>Recognizer</code> could not find a match for the utterance.
+ */
+ public final static int EVENT_NO_MATCH = 1;
+
+ /**
+ * <code>Recognizer</code> processed one frame of audio.
+ */
+ public final static int EVENT_INCOMPLETE = 2;
+
+ /**
+ * <code>Recognizer</code> has just been started.
+ */
+ public final static int EVENT_STARTED = 3;
+
+ /**
+ * <code>Recognizer</code> is stopped.
+ */
+ public final static int EVENT_STOPPED = 4;
+
+ /**
+ * Beginning of speech detected.
+ */
+ public final static int EVENT_START_OF_VOICING = 5;
+
+ /**
+ * End of speech detected.
+ */
+ public final static int EVENT_END_OF_VOICING = 6;
+
+ /**
+ * Beginning of utterance occured too soon.
+ */
+ public final static int EVENT_SPOKE_TOO_SOON = 7;
+
+ /**
+ * Recognition match detected.
+ */
+ public final static int EVENT_RECOGNITION_RESULT = 8;
+
+ /**
+ * Timeout occured before beginning of utterance.
+ */
+ public final static int EVENT_START_OF_UTTERANCE_TIMEOUT = 9;
+
+ /**
+ * Timeout occured before speech recognition could complete.
+ */
+ public final static int EVENT_RECOGNITION_TIMEOUT = 10;
+
+ /**
+ * Not enough samples to process one frame.
+ */
+ public final static int EVENT_NEED_MORE_AUDIO = 11;
+
+ /**
+ * More audio encountered than is allowed by 'swirec_max_speech_duration'.
+ */
+ public final static int EVENT_MAX_SPEECH = 12;
+
+ /**
+ * Produce a displayable string from an <code>advance</code> event.
+ * @param event
+ * @return String representing the event.
+ */
+ public static String eventToString(int event) {
+ switch (event) {
+ case EVENT_INVALID:
+ return "EVENT_INVALID";
+ case EVENT_NO_MATCH:
+ return "EVENT_NO_MATCH";
+ case EVENT_INCOMPLETE:
+ return "EVENT_INCOMPLETE";
+ case EVENT_STARTED:
+ return "EVENT_STARTED";
+ case EVENT_STOPPED:
+ return "EVENT_STOPPED";
+ case EVENT_START_OF_VOICING:
+ return "EVENT_START_OF_VOICING";
+ case EVENT_END_OF_VOICING:
+ return "EVENT_END_OF_VOICING";
+ case EVENT_SPOKE_TOO_SOON:
+ return "EVENT_SPOKE_TOO_SOON";
+ case EVENT_RECOGNITION_RESULT:
+ return "EVENT_RECOGNITION_RESULT";
+ case EVENT_START_OF_UTTERANCE_TIMEOUT:
+ return "EVENT_START_OF_UTTERANCE_TIMEOUT";
+ case EVENT_RECOGNITION_TIMEOUT:
+ return "EVENT_RECOGNITION_TIMEOUT";
+ case EVENT_NEED_MORE_AUDIO:
+ return "EVENT_NEED_MORE_AUDIO";
+ case EVENT_MAX_SPEECH:
+ return "EVENT_MAX_SPEECH";
+ }
+ return "EVENT_" + event;
+ }
+
+ //
+ // SR_Recognizer methods
+ //
+ private static native void SR_RecognizerStart(int recognizer);
+ private static native void SR_RecognizerStop(int recognizer);
+ private static native int SR_RecognizerCreate();
+ private static native void SR_RecognizerDestroy(int recognizer);
+ private static native void SR_RecognizerSetup(int recognizer);
+ private static native void SR_RecognizerUnsetup(int recognizer);
+ private static native boolean SR_RecognizerIsSetup(int recognizer);
+ private static native String SR_RecognizerGetParameter(int recognizer, String key);
+ private static native int SR_RecognizerGetSize_tParameter(int recognizer, String key);
+ private static native boolean SR_RecognizerGetBoolParameter(int recognizer, String key);
+ private static native void SR_RecognizerSetParameter(int recognizer, String key, String value);
+ private static native void SR_RecognizerSetSize_tParameter(int recognizer,
+ String key, int value);
+ private static native void SR_RecognizerSetBoolParameter(int recognizer, String key,
+ boolean value);
+ private static native void SR_RecognizerSetupRule(int recognizer, int grammar,
+ String ruleName);
+ private static native boolean SR_RecognizerHasSetupRules(int recognizer);
+ private static native void SR_RecognizerActivateRule(int recognizer, int grammar,
+ String ruleName, int weight);
+ private static native void SR_RecognizerDeactivateRule(int recognizer, int grammar,
+ String ruleName);
+ private static native void SR_RecognizerDeactivateAllRules(int recognizer);
+ private static native boolean SR_RecognizerIsActiveRule(int recognizer, int grammar,
+ String ruleName);
+ private static native boolean SR_RecognizerCheckGrammarConsistency(int recognizer,
+ int grammar);
+ private static native int SR_RecognizerPutAudio(int recognizer, byte[] buffer, int offset,
+ int length, boolean isLast);
+ private static native int SR_RecognizerAdvance(int recognizer);
+ // private static native void SR_RecognizerLoadUtterance(int recognizer,
+ // const LCHAR* filename);
+ // private static native void SR_RecognizerLoadWaveFile(int recognizer,
+ // const LCHAR* filename);
+ // private static native void SR_RecognizerSetLockFunction(int recognizer,
+ // SR_RecognizerLockFunction function, void* data);
+ private static native boolean SR_RecognizerIsSignalClipping(int recognizer);
+ private static native boolean SR_RecognizerIsSignalDCOffset(int recognizer);
+ private static native boolean SR_RecognizerIsSignalNoisy(int recognizer);
+ private static native boolean SR_RecognizerIsSignalTooQuiet(int recognizer);
+ private static native boolean SR_RecognizerIsSignalTooFewSamples(int recognizer);
+ private static native boolean SR_RecognizerIsSignalTooManySamples(int recognizer);
+ // private static native void SR_Recognizer_Change_Sample_Rate (size_t new_sample_rate);
+
+
+ //
+ // SR_AcousticState native methods
+ //
+ private static native void SR_AcousticStateReset(int recognizer);
+ private static native void SR_AcousticStateSet(int recognizer, String state);
+ private static native String SR_AcousticStateGet(int recognizer);
+
+
+ //
+ // SR_Grammar native methods
+ //
+ private static native void SR_GrammarCompile(int grammar);
+ private static native void SR_GrammarAddWordToSlot(int grammar, String slot,
+ String word, String pronunciation, int weight, String tag);
+ private static native void SR_GrammarResetAllSlots(int grammar);
+ // private static native void SR_GrammarAddNametagToSlot(int grammar, String slot,
+ // const struct SR_Nametag_t* nametag, int weight, String tag);
+ private static native void SR_GrammarSetupVocabulary(int grammar, int vocabulary);
+ // private static native void SR_GrammarSetupModels(int grammar, SR_AcousticModels* models);
+ private static native void SR_GrammarSetupRecognizer(int grammar, int recognizer);
+ private static native void SR_GrammarUnsetupRecognizer(int grammar);
+ // private static native void SR_GrammarGetModels(int grammar,SR_AcousticModels** models);
+ private static native int SR_GrammarCreate();
+ private static native void SR_GrammarDestroy(int grammar);
+ private static native int SR_GrammarLoad(String filename);
+ private static native void SR_GrammarSave(int grammar, String filename);
+ // private static native void SR_GrammarSetDispatchFunction(int grammar,
+ // const LCHAR* name, void* userData, SR_GrammarDispatchFunction function);
+ // private static native void SR_GrammarSetParameter(int grammar, const
+ // LCHAR* key, void* value);
+ // private static native void SR_GrammarSetSize_tParameter(int grammar,
+ // const LCHAR* key, size_t value);
+ // private static native void SR_GrammarGetParameter(int grammar, const
+ // LCHAR* key, void** value);
+ // private static native void SR_GrammarGetSize_tParameter(int grammar,
+ // const LCHAR* key, size_t* value);
+ // private static native void SR_GrammarCheckParse(int grammar, const LCHAR*
+ // transcription, SR_SemanticResult** result, size_t* resultCount);
+ private static native void SR_GrammarAllowOnly(int grammar, String transcription);
+ private static native void SR_GrammarAllowAll(int grammar);
+
+
+ //
+ // SR_Vocabulary native methods
+ //
+ // private static native int SR_VocabularyCreate();
+ private static native int SR_VocabularyLoad();
+ // private static native void SR_VocabularySave(SR_Vocabulary* self,
+ // const LCHAR* filename);
+ // private static native void SR_VocabularyAddWord(SR_Vocabulary* self,
+ // const LCHAR* word);
+ // private static native void SR_VocabularyGetLanguage(SR_Vocabulary* self,
+ // ESR_Locale* locale);
+ private static native void SR_VocabularyDestroy(int vocabulary);
+ private static native String SR_VocabularyGetPronunciation(int vocabulary, String word);
+
+
+ //
+ // SR_RecognizerResult native methods
+ //
+ private static native byte[] SR_RecognizerResultGetWaveform(int recognizer);
+ private static native int SR_RecognizerResultGetSize(int recognizer);
+ private static native int SR_RecognizerResultGetKeyCount(int recognizer, int nbest);
+ private static native String[] SR_RecognizerResultGetKeyList(int recognizer, int nbest);
+ private static native String SR_RecognizerResultGetValue(int recognizer,
+ int nbest, String key);
+ // private static native void SR_RecognizerResultGetLocale(int recognizer, ESR_Locale* locale);
+}
diff --git a/core/java/android/speech/srec/UlawEncoderInputStream.java b/core/java/android/speech/srec/UlawEncoderInputStream.java
new file mode 100644
index 0000000..132fe027
--- /dev/null
+++ b/core/java/android/speech/srec/UlawEncoderInputStream.java
@@ -0,0 +1,186 @@
+/*
+ * ---------------------------------------------------------------------------
+ * UlawEncoderInputStream.java
+ *
+ * Copyright 2008 Nuance Communciations, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the 'License'); you may not
+ * use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * ---------------------------------------------------------------------------
+ */
+
+package android.speech.srec;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * InputStream which transforms 16 bit pcm data to ulaw data.
+ *
+ * @hide pending API council approval
+ */
+public final class UlawEncoderInputStream extends InputStream {
+ private final static String TAG = "UlawEncoderInputStream";
+
+ private final static int MAX_ULAW = 8192;
+ private final static int SCALE_BITS = 16;
+
+ private InputStream mIn;
+
+ private int mMax = 0;
+
+ private final byte[] mBuf = new byte[1024];
+ private int mBufCount = 0; // should be 0 or 1
+
+ private final byte[] mOneByte = new byte[1];
+
+
+ public static void encode(byte[] pcmBuf, int pcmOffset,
+ byte[] ulawBuf, int ulawOffset, int length, int max) {
+
+ // from 'ulaw' in wikipedia
+ // +8191 to +8159 0x80
+ // +8158 to +4063 in 16 intervals of 256 0x80 + interval number
+ // +4062 to +2015 in 16 intervals of 128 0x90 + interval number
+ // +2014 to +991 in 16 intervals of 64 0xA0 + interval number
+ // +990 to +479 in 16 intervals of 32 0xB0 + interval number
+ // +478 to +223 in 16 intervals of 16 0xC0 + interval number
+ // +222 to +95 in 16 intervals of 8 0xD0 + interval number
+ // +94 to +31 in 16 intervals of 4 0xE0 + interval number
+ // +30 to +1 in 15 intervals of 2 0xF0 + interval number
+ // 0 0xFF
+
+ // -1 0x7F
+ // -31 to -2 in 15 intervals of 2 0x70 + interval number
+ // -95 to -32 in 16 intervals of 4 0x60 + interval number
+ // -223 to -96 in 16 intervals of 8 0x50 + interval number
+ // -479 to -224 in 16 intervals of 16 0x40 + interval number
+ // -991 to -480 in 16 intervals of 32 0x30 + interval number
+ // -2015 to -992 in 16 intervals of 64 0x20 + interval number
+ // -4063 to -2016 in 16 intervals of 128 0x10 + interval number
+ // -8159 to -4064 in 16 intervals of 256 0x00 + interval number
+ // -8192 to -8160 0x00
+
+ // set scale factors
+ if (max <= 0) max = MAX_ULAW;
+
+ int coef = MAX_ULAW * (1 << SCALE_BITS) / max;
+
+ for (int i = 0; i < length; i++) {
+ int pcm = (0xff & pcmBuf[pcmOffset++]) + (pcmBuf[pcmOffset++] << 8);
+ pcm = (pcm * coef) >> SCALE_BITS;
+
+ int ulaw;
+ if (pcm >= 0) {
+ ulaw = pcm <= 0 ? 0xff :
+ pcm <= 30 ? 0xf0 + (( 30 - pcm) >> 1) :
+ pcm <= 94 ? 0xe0 + (( 94 - pcm) >> 2) :
+ pcm <= 222 ? 0xd0 + (( 222 - pcm) >> 3) :
+ pcm <= 478 ? 0xc0 + (( 478 - pcm) >> 4) :
+ pcm <= 990 ? 0xb0 + (( 990 - pcm) >> 5) :
+ pcm <= 2014 ? 0xa0 + ((2014 - pcm) >> 6) :
+ pcm <= 4062 ? 0x90 + ((4062 - pcm) >> 7) :
+ pcm <= 8158 ? 0x80 + ((8158 - pcm) >> 8) :
+ 0x80;
+ } else {
+ ulaw = -1 <= pcm ? 0x7f :
+ -31 <= pcm ? 0x70 + ((pcm - -31) >> 1) :
+ -95 <= pcm ? 0x60 + ((pcm - -95) >> 2) :
+ -223 <= pcm ? 0x50 + ((pcm - -223) >> 3) :
+ -479 <= pcm ? 0x40 + ((pcm - -479) >> 4) :
+ -991 <= pcm ? 0x30 + ((pcm - -991) >> 5) :
+ -2015 <= pcm ? 0x20 + ((pcm - -2015) >> 6) :
+ -4063 <= pcm ? 0x10 + ((pcm - -4063) >> 7) :
+ -8159 <= pcm ? 0x00 + ((pcm - -8159) >> 8) :
+ 0x00;
+ }
+ ulawBuf[ulawOffset++] = (byte)ulaw;
+ }
+ }
+
+ /**
+ * Compute the maximum of the absolute value of the pcm samples.
+ * The return value can be used to set ulaw encoder scaling.
+ * @param pcmBuf array containing 16 bit pcm data.
+ * @param offset offset of start of 16 bit pcm data.
+ * @param length number of pcm samples (not number of input bytes)
+ * @return maximum abs of pcm data values
+ */
+ public static int maxAbsPcm(byte[] pcmBuf, int offset, int length) {
+ int max = 0;
+ for (int i = 0; i < length; i++) {
+ int pcm = (0xff & pcmBuf[offset++]) + (pcmBuf[offset++] << 8);
+ if (pcm < 0) pcm = -pcm;
+ if (pcm > max) max = pcm;
+ }
+ return max;
+ }
+
+ /**
+ * Create an InputStream which takes 16 bit pcm data and produces ulaw data.
+ * @param in InputStream containing 16 bit pcm data.
+ * @param max pcm value corresponding to maximum ulaw value.
+ */
+ public UlawEncoderInputStream(InputStream in, int max) {
+ mIn = in;
+ mMax = max;
+ }
+
+ @Override
+ public int read(byte[] buf, int offset, int length) throws IOException {
+ if (mIn == null) throw new IllegalStateException("not open");
+
+ // return at least one byte, but try to fill 'length'
+ while (mBufCount < 2) {
+ int n = mIn.read(mBuf, mBufCount, Math.min(length * 2, mBuf.length - mBufCount));
+ if (n == -1) return -1;
+ mBufCount += n;
+ }
+
+ // compand data
+ int n = Math.min(mBufCount / 2, length);
+ encode(mBuf, 0, buf, offset, n, mMax);
+
+ // move data to bottom of mBuf
+ mBufCount -= n * 2;
+ for (int i = 0; i < mBufCount; i++) mBuf[i] = mBuf[i + n * 2];
+
+ return n;
+ }
+
+ @Override
+ public int read(byte[] buf) throws IOException {
+ return read(buf, 0, buf.length);
+ }
+
+ @Override
+ public int read() throws IOException {
+ int n = read(mOneByte, 0, 1);
+ if (n == -1) return -1;
+ return 0xff & (int)mOneByte[0];
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (mIn != null) {
+ InputStream in = mIn;
+ mIn = null;
+ in.close();
+ }
+ }
+
+ @Override
+ public int available() throws IOException {
+ return (mIn.available() + mBufCount) / 2;
+ }
+}
diff --git a/core/java/android/speech/srec/WaveHeader.java b/core/java/android/speech/srec/WaveHeader.java
new file mode 100644
index 0000000..a99496d
--- /dev/null
+++ b/core/java/android/speech/srec/WaveHeader.java
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.speech.srec;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * This class represents the header of a WAVE format audio file, which usually
+ * have a .wav suffix. The following integer valued fields are contained:
+ * <ul>
+ * <li> format - usually PCM, ALAW or ULAW.
+ * <li> numChannels - 1 for mono, 2 for stereo.
+ * <li> sampleRate - usually 8000, 11025, 16000, 22050, or 44100 hz.
+ * <li> bitsPerSample - usually 16 for PCM, 8 for ALAW, or 8 for ULAW.
+ * <li> numBytes - size of audio data after this header, in bytes.
+ * </ul>
+ * @hide pending API council approval
+ */
+public class WaveHeader {
+
+ // follows WAVE format in http://ccrma.stanford.edu/courses/422/projects/WaveFormat
+
+ private static final String TAG = "WaveHeader";
+
+ private static final int HEADER_LENGTH = 44;
+
+ /** Indicates PCM format. */
+ public static final short FORMAT_PCM = 1;
+ /** Indicates ALAW format. */
+ public static final short FORMAT_ALAW = 6;
+ /** Indicates ULAW format. */
+ public static final short FORMAT_ULAW = 7;
+
+ private short mFormat;
+ private short mNumChannels;
+ private int mSampleRate;
+ private short mBitsPerSample;
+ private int mNumBytes;
+
+ /**
+ * Construct a WaveHeader, with all fields defaulting to zero.
+ */
+ public WaveHeader() {
+ }
+
+ /**
+ * Construct a WaveHeader, with fields initialized.
+ * @param format format of audio data,
+ * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
+ * @param numChannels 1 for mono, 2 for stereo.
+ * @param sampleRate typically 8000, 11025, 16000, 22050, or 44100 hz.
+ * @param bitsPerSample usually 16 for PCM, 8 for ULAW or 8 for ALAW.
+ * @param numBytes size of audio data after this header, in bytes.
+ */
+ public WaveHeader(short format, short numChannels, int sampleRate, short bitsPerSample, int numBytes) {
+ mFormat = format;
+ mSampleRate = sampleRate;
+ mNumChannels = numChannels;
+ mBitsPerSample = bitsPerSample;
+ mNumBytes = numBytes;
+ }
+
+ /**
+ * Get the format field.
+ * @return format field,
+ * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
+ */
+ public short getFormat() {
+ return mFormat;
+ }
+
+ /**
+ * Set the format field.
+ * @param format
+ * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
+ * @return reference to this WaveHeader instance.
+ */
+ public WaveHeader setFormat(short format) {
+ mFormat = format;
+ return this;
+ }
+
+ /**
+ * Get the number of channels.
+ * @return number of channels, 1 for mono, 2 for stereo.
+ */
+ public short getNumChannels() {
+ return mNumChannels;
+ }
+
+ /**
+ * Set the number of channels.
+ * @param numChannels 1 for mono, 2 for stereo.
+ * @return reference to this WaveHeader instance.
+ */
+ public WaveHeader setNumChannels(short numChannels) {
+ mNumChannels = numChannels;
+ return this;
+ }
+
+ /**
+ * Get the sample rate.
+ * @return sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz.
+ */
+ public int getSampleRate() {
+ return mSampleRate;
+ }
+
+ /**
+ * Set the sample rate.
+ * @param sampleRate sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz.
+ * @return reference to this WaveHeader instance.
+ */
+ public WaveHeader setSampleRate(int sampleRate) {
+ mSampleRate = sampleRate;
+ return this;
+ }
+
+ /**
+ * Get the number of bits per sample.
+ * @return number of bits per sample,
+ * usually 16 for PCM, 8 for ULAW or 8 for ALAW.
+ */
+ public short getBitsPerSample() {
+ return mBitsPerSample;
+ }
+
+ /**
+ * Set the number of bits per sample.
+ * @param bitsPerSample number of bits per sample,
+ * usually 16 for PCM, 8 for ULAW or 8 for ALAW.
+ * @return reference to this WaveHeader instance.
+ */
+ public WaveHeader setBitsPerSample(short bitsPerSample) {
+ mBitsPerSample = bitsPerSample;
+ return this;
+ }
+
+ /**
+ * Get the size of audio data after this header, in bytes.
+ * @return size of audio data after this header, in bytes.
+ */
+ public int getNumBytes() {
+ return mNumBytes;
+ }
+
+ /**
+ * Set the size of audio data after this header, in bytes.
+ * @param numBytes size of audio data after this header, in bytes.
+ * @return reference to this WaveHeader instance.
+ */
+ public WaveHeader setNumBytes(int numBytes) {
+ mNumBytes = numBytes;
+ return this;
+ }
+
+ /**
+ * Read and initialize a WaveHeader.
+ * @param in {@link java.io.InputStream} to read from.
+ * @return number of bytes consumed.
+ * @throws IOException
+ */
+ public int read(InputStream in) throws IOException {
+ /* RIFF header */
+ readId(in, "RIFF");
+ int numBytes = readInt(in) - 36;
+ readId(in, "WAVE");
+
+ /* fmt chunk */
+ readId(in, "fmt ");
+ if (16 != readInt(in)) throw new IOException("fmt chunk length not 16");
+ mFormat = readShort(in);
+ mNumChannels = readShort(in);
+ mSampleRate = readInt(in);
+ int byteRate = readInt(in);
+ short blockAlign = readShort(in);
+ mBitsPerSample = readShort(in);
+ if (byteRate != mNumChannels * mSampleRate * mBitsPerSample / 8) {
+ throw new IOException("fmt.ByteRate field inconsistent");
+ }
+ if (blockAlign != mNumChannels * mBitsPerSample / 8) {
+ throw new IOException("fmt.BlockAlign field inconsistent");
+ }
+
+ /* data chunk */
+ readId(in, "data");
+ mNumBytes = readInt(in);
+
+ return HEADER_LENGTH;
+ }
+
+ private static void readId(InputStream in, String id) throws IOException {
+ for (int i = 0; i < id.length(); i++) {
+ if (id.charAt(i) != in.read()) throw new IOException( id + " tag not present");
+ }
+ }
+
+ private static int readInt(InputStream in) throws IOException {
+ return in.read() | (in.read() << 8) | (in.read() << 16) | (in.read() << 24);
+ }
+
+ private static short readShort(InputStream in) throws IOException {
+ return (short)(in.read() | (in.read() << 8));
+ }
+
+ /**
+ * Write a WAVE file header.
+ * @param out {@link java.io.OutputStream} to receive the header.
+ * @return number of bytes written.
+ * @throws IOException
+ */
+ public int write(OutputStream out) throws IOException {
+ /* RIFF header */
+ writeId(out, "RIFF");
+ writeInt(out, 36 + mNumBytes);
+ writeId(out, "WAVE");
+
+ /* fmt chunk */
+ writeId(out, "fmt ");
+ writeInt(out, 16);
+ writeShort(out, mFormat);
+ writeShort(out, mNumChannels);
+ writeInt(out, mSampleRate);
+ writeInt(out, mNumChannels * mSampleRate * mBitsPerSample / 8);
+ writeShort(out, (short)(mNumChannels * mBitsPerSample / 8));
+ writeShort(out, mBitsPerSample);
+
+ /* data chunk */
+ writeId(out, "data");
+ writeInt(out, mNumBytes);
+
+ return HEADER_LENGTH;
+ }
+
+ private static void writeId(OutputStream out, String id) throws IOException {
+ for (int i = 0; i < id.length(); i++) out.write(id.charAt(i));
+ }
+
+ private static void writeInt(OutputStream out, int val) throws IOException {
+ out.write(val >> 0);
+ out.write(val >> 8);
+ out.write(val >> 16);
+ out.write(val >> 24);
+ }
+
+ private static void writeShort(OutputStream out, short val) throws IOException {
+ out.write(val >> 0);
+ out.write(val >> 8);
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "WaveHeader format=%d numChannels=%d sampleRate=%d bitsPerSample=%d numBytes=%d",
+ mFormat, mNumChannels, mSampleRate, mBitsPerSample, mNumBytes);
+ }
+
+}
diff --git a/core/java/android/speech/srec/package.html b/core/java/android/speech/srec/package.html
new file mode 100644
index 0000000..9a99df8
--- /dev/null
+++ b/core/java/android/speech/srec/package.html
@@ -0,0 +1,6 @@
+<HTML>
+<BODY>
+Simple, synchronous SREC speech recognition API.
+@hide
+</BODY>
+</HTML>