diff options
| author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:31:44 -0800 | 
|---|---|---|
| committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:31:44 -0800 | 
| commit | 9066cfe9886ac131c34d59ed0e2d287b0e3c0087 (patch) | |
| tree | d88beb88001f2482911e3d28e43833b50e4b4e97 /core/java/android/speech | |
| parent | d83a98f4ce9cfa908f5c54bbd70f03eec07e7553 (diff) | |
| download | frameworks_base-9066cfe9886ac131c34d59ed0e2d287b0e3c0087.zip frameworks_base-9066cfe9886ac131c34d59ed0e2d287b0e3c0087.tar.gz frameworks_base-9066cfe9886ac131c34d59ed0e2d287b0e3c0087.tar.bz2 | |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'core/java/android/speech')
| -rw-r--r-- | core/java/android/speech/RecognizerIntent.java | 157 | ||||
| -rw-r--r-- | core/java/android/speech/srec/MicrophoneInputStream.java | 110 | ||||
| -rw-r--r-- | core/java/android/speech/srec/Recognizer.java | 719 | ||||
| -rw-r--r-- | core/java/android/speech/srec/UlawEncoderInputStream.java | 186 | ||||
| -rw-r--r-- | core/java/android/speech/srec/WaveHeader.java | 274 | ||||
| -rw-r--r-- | core/java/android/speech/srec/package.html | 6 | 
6 files changed, 1452 insertions, 0 deletions
| diff --git a/core/java/android/speech/RecognizerIntent.java b/core/java/android/speech/RecognizerIntent.java new file mode 100644 index 0000000..987e763 --- /dev/null +++ b/core/java/android/speech/RecognizerIntent.java @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.speech; + +import android.app.Activity; +import android.content.ActivityNotFoundException; +import android.content.Intent; + +/** + * Constants for supporting speech recognition through starting an {@link Intent} + */ +public class RecognizerIntent { +    private RecognizerIntent() { +        // Not for instantiating. +    } + +    /** +     * Starts an activity that will prompt the user for speech and sends it through a +     * speech recognizer.  The results will be returned via activity results, or forwarded +     * via a PendingIntent if one is provided. +     *  +     * <p>Required extras: +     * <ul> +     *   <li>{@link #EXTRA_LANGUAGE_MODEL} +     * </ul> +     *  +     * <p>Optional extras: +     * <ul> +     *   <li>{@link #EXTRA_PROMPT} +     *   <li>{@link #EXTRA_LANGUAGE} +     *   <li>{@link #EXTRA_MAX_RESULTS} +     *   <li>{@link #EXTRA_RESULTS_PENDINGINTENT} +     *   <li>{@link #EXTRA_RESULTS_PENDINGINTENT_BUNDLE} +     * </ul> +     *  +     * <p> Result extras: +     * <ul> +     *   <li>{@link #EXTRA_RESULTS} +     * </ul> +     *  +     * <p>NOTE: There may not be any applications installed to handle this action, so you should +     * make sure to catch {@link ActivityNotFoundException}. +     */ +    public static final String ACTION_RECOGNIZE_SPEECH = "android.speech.action.RECOGNIZE_SPEECH"; + +    /** +     * Starts an activity that will prompt the user for speech, sends it through a +     * speech recognizer, and invokes and displays a web search result. +     *  +     * <p>Required extras: +     * <ul> +     *   <li>{@link #EXTRA_LANGUAGE_MODEL} +     * </ul> +     *  +     * <p>Optional extras: +     * <ul> +     *   <li>{@link #EXTRA_PROMPT} +     *   <li>{@link #EXTRA_LANGUAGE} +     *   <li>{@link #EXTRA_MAX_RESULTS} +     * </ul> +     *  +     * <p> Result extras: +     * <ul> +     *   <li>{@link #EXTRA_RESULTS} +     * </ul> +     *  +     * <p>NOTE: There may not be any applications installed to handle this action, so you should +     * make sure to catch {@link ActivityNotFoundException}. +     */ +    public static final String ACTION_WEB_SEARCH = "android.speech.action.WEB_SEARCH"; + +    /** +     * Informs the recognizer which speech model to prefer when performing +     * {@link #ACTION_RECOGNIZE_SPEECH}. The recognizer uses this +     * information to fine tune the results. This extra is required. Activities implementing +     * {@link #ACTION_RECOGNIZE_SPEECH} may interpret the values as they see fit. +     *  +     *  @see #LANGUAGE_MODEL_FREE_FORM +     *  @see #LANGUAGE_MODEL_WEB_SEARCH +     */ +    public static final String EXTRA_LANGUAGE_MODEL = "android.speech.extra.LANGUAGE_MODEL"; + +    /**  +     * Use a language model based on free-form speech recognition.  This is a value to use for  +     * {@link #EXTRA_LANGUAGE_MODEL}.  +     * @see #EXTRA_LANGUAGE_MODEL +     */ +    public static final String LANGUAGE_MODEL_FREE_FORM = "free_form"; +    /**  +     * Use a language model based on web search terms.  This is a value to use for  +     * {@link #EXTRA_LANGUAGE_MODEL}.  +     * @see #EXTRA_LANGUAGE_MODEL +     */ +    public static final String LANGUAGE_MODEL_WEB_SEARCH = "web_search"; + +    /** Optional text prompt to show to the user when asking them to speak. */ +    public static final String EXTRA_PROMPT = "android.speech.extra.PROMPT"; + +    /** +     * Optional language override to inform the recognizer that it should expect speech in +     * a language different than the one set in the {@link java.util.Locale#getDefault()}.  +     */ +    public static final String EXTRA_LANGUAGE = "android.speech.extra.LANGUAGE"; + +    /**  +     * Optional limit on the maximum number of results to return. If omitted the recognizer +     * will choose how many results to return. Must be an integer. +     */ +    public static final String EXTRA_MAX_RESULTS = "android.speech.extra.MAX_RESULTS"; +     +    /** +     * When the intent is {@link #ACTION_RECOGNIZE_SPEECH}, the speech input activity will +     * return results to you via the activity results mechanism.  Alternatively, if you use this +     * extra to supply a PendingIntent, the results will be added to its bundle and the  +     * PendingIntent will be sent to its target. +     */ +    public static final String EXTRA_RESULTS_PENDINGINTENT =  +            "android.speech.extra.RESULTS_PENDINGINTENT"; +    /** +     * If you use {@link #EXTRA_RESULTS_PENDINGINTENT} to supply a forwarding intent, you can +     * also use this extra to supply additional extras for the final intent.  The search results +     * will be added to this bundle, and the combined bundle will be sent to the target. +     */ +    public static final String EXTRA_RESULTS_PENDINGINTENT_BUNDLE =  +            "android.speech.extra.RESULTS_PENDINGINTENT_BUNDLE"; + +    /** Result code returned when no matches are found for the given speech */ +    public static final int RESULT_NO_MATCH = Activity.RESULT_FIRST_USER; +    /** Result code returned when there is a generic client error */ +    public static final int RESULT_CLIENT_ERROR = Activity.RESULT_FIRST_USER + 1; +    /** Result code returned when the recognition server returns an error */ +    public static final int RESULT_SERVER_ERROR = Activity.RESULT_FIRST_USER + 2; +    /** Result code returned when a network error was encountered */ +    public static final int RESULT_NETWORK_ERROR = Activity.RESULT_FIRST_USER + 3; +    /** Result code returned when an audio error was encountered */ +    public static final int RESULT_AUDIO_ERROR = Activity.RESULT_FIRST_USER + 4; + +    /** +     * An ArrayList<String> of the potential results when performing +     * {@link #ACTION_RECOGNIZE_SPEECH}. Only present when {@link Activity#RESULT_OK} is returned. +     */ +    public static final String EXTRA_RESULTS = "android.speech.extra.RESULTS"; +} diff --git a/core/java/android/speech/srec/MicrophoneInputStream.java b/core/java/android/speech/srec/MicrophoneInputStream.java new file mode 100644 index 0000000..fab77a9 --- /dev/null +++ b/core/java/android/speech/srec/MicrophoneInputStream.java @@ -0,0 +1,110 @@ +/*---------------------------------------------------------------------------* + *  MicrophoneInputStream.java                                               * + *                                                                           * + *  Copyright 2007 Nuance Communciations, Inc.                               * + *                                                                           * + *  Licensed under the Apache License, Version 2.0 (the 'License');          * + *  you may not use this file except in compliance with the License.         * + *                                                                           * + *  You may obtain a copy of the License at                                  * + *      http://www.apache.org/licenses/LICENSE-2.0                           * + *                                                                           * + *  Unless required by applicable law or agreed to in writing, software      * + *  distributed under the License is distributed on an 'AS IS' BASIS,        * + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + *  See the License for the specific language governing permissions and      * + *  limitations under the License.                                           * + *                                                                           * + *---------------------------------------------------------------------------*/ + + +package android.speech.srec; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.IllegalStateException; + + +/** + * PCM input stream from the microphone, 16 bits per sample. + */ +public final class MicrophoneInputStream extends InputStream { +    static { +        System.loadLibrary("srec_jni"); +    } +     +    private final static String TAG = "MicrophoneInputStream"; +    private int mAudioRecord = 0; +    private byte[] mOneByte = new byte[1]; +     +    /** +     * MicrophoneInputStream constructor. +     * @param sampleRate sample rate of the microphone, typically 11025 or 8000. +     * @param fifoDepth depth of the real time fifo, measured in sampleRate clock ticks. +     * This determines how long an application may delay before losing data. +     */ +    public MicrophoneInputStream(int sampleRate, int fifoDepth) throws IOException { +        mAudioRecord = AudioRecordNew(sampleRate, fifoDepth); +        if (mAudioRecord == 0) throw new IOException("AudioRecord constructor failed - busy?"); +        int status = AudioRecordStart(mAudioRecord); +        if (status != 0) { +            close(); +            throw new IOException("AudioRecord start failed: " + status); +        } +    } + +    @Override +    public int read() throws IOException { +        if (mAudioRecord == 0) throw new IllegalStateException("not open"); +        int rtn = AudioRecordRead(mAudioRecord, mOneByte, 0, 1); +        return rtn == 1 ? ((int)mOneByte[0] & 0xff) : -1; +    } + +    @Override +    public int read(byte[] b) throws IOException { +        if (mAudioRecord == 0) throw new IllegalStateException("not open"); +        return AudioRecordRead(mAudioRecord, b, 0, b.length); +    } +     +    @Override +    public int read(byte[] b, int offset, int length) throws IOException { +        if (mAudioRecord == 0) throw new IllegalStateException("not open"); +        // TODO: should we force all reads to be a multiple of the sample size? +        return AudioRecordRead(mAudioRecord, b, offset, length); +    } +     +    /** +     * Closes this stream. +     */ +    @Override +    public void close() throws IOException { +        if (mAudioRecord != 0) { +            try { +                AudioRecordStop(mAudioRecord); +            } finally { +                try { +                    AudioRecordDelete(mAudioRecord); +                } finally { +                    mAudioRecord = 0; +                } +            } +        } +    } +     +    @Override +    protected void finalize() throws Throwable { +        if (mAudioRecord != 0) { +            close(); +            throw new IOException("someone forgot to close MicrophoneInputStream"); +        } +    } +     +    // +    // AudioRecord JNI interface +    // +    private static native int AudioRecordNew(int sampleRate, int fifoDepth); +    private static native int AudioRecordStart(int audioRecord); +    private static native int AudioRecordRead(int audioRecord, byte[] b, int offset, int length) throws IOException; +    private static native void AudioRecordStop(int audioRecord) throws IOException; +    private static native void AudioRecordDelete(int audioRecord) throws IOException; +} diff --git a/core/java/android/speech/srec/Recognizer.java b/core/java/android/speech/srec/Recognizer.java new file mode 100644 index 0000000..a03a36a --- /dev/null +++ b/core/java/android/speech/srec/Recognizer.java @@ -0,0 +1,719 @@ +/* + * --------------------------------------------------------------------------- + * Recognizer.java + *  + * Copyright 2007 Nuance Communciations, Inc. + *  + * Licensed under the Apache License, Version 2.0 (the 'License'); you may not + * use this file except in compliance with the License. + *  + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + *  + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + *  + * --------------------------------------------------------------------------- + */ + + +package android.speech.srec; + +import android.util.Config; +import android.util.Log; + +import java.io.File; +import java.io.InputStream; +import java.io.IOException; +import java.util.Locale; + +/** + * Simple, synchronous speech recognizer, using the Nuance SREC package. + * Usages proceeds as follows: + *  + * <ul> + * <li>Create a <code>Recognizer</code>. + * <li>Create a <code>Recognizer.Grammar</code>. + * <li>Setup the <code>Recognizer.Grammar</code>. + * <li>Reset the <code>Recognizer.Grammar</code> slots, if needed. + * <li>Fill the <code>Recognizer.Grammar</code> slots, if needed. + * <li>Compile the <code>Recognizer.Grammar</code>, if needed. + * <li>Save the filled <code>Recognizer.Grammar</code>, if needed. + * <li>Start the <code>Recognizer</code>. + * <li>Loop over <code>advance</code> and <code>putAudio</code> until recognition complete. + * <li>Fetch and process results, or notify of failure. + * <li>Stop the <code>Recognizer</code>. + * <li>Destroy the <code>Recognizer</code>. + * </ul> + *  + * <p>Below is example code</p> + *  + * <pre class="prettyprint"> + *  + * // create and start audio input + * InputStream audio = new MicrophoneInputStream(11025, 11025*5); + * // create a Recognizer + * String cdir = Recognizer.getConfigDir(null); + * Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par"); + * // create and load a Grammar + * Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g"); + * // setup the Grammar to work with the Recognizer + * grammar.setupRecognizer(); + * // fill the Grammar slots with names and save, if required + * grammar.resetAllSlots(); + * for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1"); + * grammar.compile(); + * grammar.save(".../foo.g2g"); + * // start the Recognizer + * recognizer.start(); + * // loop over Recognizer events + * while (true) { + *     switch (recognizer.advance()) { + *     case Recognizer.EVENT_INCOMPLETE: + *     case Recognizer.EVENT_STARTED: + *     case Recognizer.EVENT_START_OF_VOICING: + *     case Recognizer.EVENT_END_OF_VOICING: + *         // let the Recognizer continue to run + *         continue; + *     case Recognizer.EVENT_RECOGNITION_RESULT: + *         // success, so fetch results here! + *         for (int i = 0; i < recognizer.getResultCount(); i++) { + *             String result = recognizer.getResult(i, Recognizer.KEY_LITERAL); + *         } + *         break; + *     case Recognizer.EVENT_NEED_MORE_AUDIO: + *         // put more audio in the Recognizer + *         recognizer.putAudio(audio); + *         continue; + *     default: + *         notifyFailure(); + *         break; + *     } + *     break; + * } + * // stop the Recognizer + * recognizer.stop(); + * // destroy the Recognizer + * recognizer.destroy(); + * // stop the audio device + * audio.close(); + *  + * </pre> + */ +public final class Recognizer { +    static { +        System.loadLibrary("srec_jni"); +    } + +    private static String TAG = "Recognizer"; +     +    /** +     * Result key corresponding to confidence score. +     */ +    public static final String KEY_CONFIDENCE = "conf"; +     +    /** +     * Result key corresponding to literal text. +     */ +    public static final String KEY_LITERAL = "literal"; +     +    /** +     * Result key corresponding to semantic meaning text. +     */ +    public static final String KEY_MEANING = "meaning"; + +    // handle to SR_Vocabulary object +    private int mVocabulary = 0; +     +    // handle to SR_Recognizer object +    private int mRecognizer = 0; +     +    // Grammar currently associated with Recognizer via SR_GrammarSetupRecognizer +    private Grammar mActiveGrammar = null; +     +    /** +     * Get the pathname of the SREC configuration directory corresponding to the +     * language indicated by the Locale. +     * This directory contains dictionaries, speech models, +     * configuration files, and other data needed by the Recognizer. +     * @param locale <code>Locale</code> corresponding to the desired language, +     * or null for default, currently <code>Locale.US</code>. +     * @return Pathname of the configuration directory. +     */ +    public static String getConfigDir(Locale locale) { +        if (locale == null) locale = Locale.US; +        String dir = "/system/usr/srec/config/" + +                locale.toString().replace('_', '.').toLowerCase(); +        if ((new File(dir)).isDirectory()) return dir; +        return null; +    } + +    /** +     * Create an instance of a SREC speech recognizer. +     *  +     * @param configFile pathname of the baseline*.par configuration file, +     * which in turn contains references to dictionaries, speech models, +     * and other data needed to configure and operate the recognizer. +     * A separate config file is needed for each audio sample rate. +     * Two files, baseline11k.par and baseline8k.par, which correspond to +     * 11025 and 8000 hz, are present in the directory indicated by +     * {@link #getConfigDir}. +     * @throws IOException +     */ +    public Recognizer(String configFile) throws IOException { +        PMemInit(); +        SR_SessionCreate(configFile); +        mRecognizer = SR_RecognizerCreate(); +        SR_RecognizerSetup(mRecognizer); +        mVocabulary = SR_VocabularyLoad(); +    } + +    /** +     * Represents a grammar loaded into the Recognizer. +     */ +    public class Grammar { +        private int mGrammar = 0; + +        /** +         * Create a <code>Grammar</code> instance. +         * @param g2gFileName pathname of g2g file. +         */ +        public Grammar(String g2gFileName) throws IOException { +            mGrammar = SR_GrammarLoad(g2gFileName); +            SR_GrammarSetupVocabulary(mGrammar, mVocabulary); +        } + +        /** +         * Reset all slots. +         */ +        public void resetAllSlots() { +            SR_GrammarResetAllSlots(mGrammar); +        } + +        /** +         * Add a word to a slot. +         *  +         * @param slot slot name. +         * @param word word to insert. +         * @param pron pronunciation, or null to derive from word. +         * @param weight weight to give the word.  One is normal, 50 is low. +         * @param tag semantic meaning tag string. +         */ +        public void addWordToSlot(String slot, String word, String pron, int weight, String tag) { +            SR_GrammarAddWordToSlot(mGrammar, slot, word, pron, weight, tag);  +        } + +        /** +         * Compile all slots. +         */ +        public void compile() { +            SR_GrammarCompile(mGrammar); +        } + +        /** +         * Setup <code>Grammar</code> with <code>Recognizer</code>. +         */ +        public void setupRecognizer() { +            SR_GrammarSetupRecognizer(mGrammar, mRecognizer); +            mActiveGrammar = this; +        } + +        /** +         * Save <code>Grammar</code> to g2g file. +         *  +         * @param g2gFileName +         * @throws IOException +         */ +        public void save(String g2gFileName) throws IOException { +            SR_GrammarSave(mGrammar, g2gFileName); +        } + +        /** +         * Release resources associated with this <code>Grammar</code>. +         */ +        public void destroy() { +            // TODO: need to do cleanup and disassociation with Recognizer +            if (mGrammar != 0) { +                SR_GrammarDestroy(mGrammar); +                mGrammar = 0; +            } +        } + +        /** +         * Clean up resources. +         */ +        protected void finalize() { +            if (mGrammar != 0) { +                destroy(); +                throw new IllegalStateException("someone forgot to destroy Grammar"); +            } +        } +    } + +    /** +     * Start recognition +     */ +    public void start() { +        // TODO: shouldn't be here? +        SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1); +        SR_RecognizerStart(mRecognizer); +    } +     +    /** +     * Process some audio and return the current status. +     * @return recognition event, one of: +     * <ul> +     * <li><code>EVENT_INVALID</code> +     * <li><code>EVENT_NO_MATCH</code> +     * <li><code>EVENT_INCOMPLETE</code> +     * <li><code>EVENT_STARTED</code> +     * <li><code>EVENT_STOPPED</code> +     * <li><code>EVENT_START_OF_VOICING</code> +     * <li><code>EVENT_END_OF_VOICING</code> +     * <li><code>EVENT_SPOKE_TOO_SOON</code> +     * <li><code>EVENT_RECOGNITION_RESULT</code> +     * <li><code>EVENT_START_OF_UTTERANCE_TIMEOUT</code> +     * <li><code>EVENT_RECOGNITION_TIMEOUT</code> +     * <li><code>EVENT_NEED_MORE_AUDIO</code> +     * <li><code>EVENT_MAX_SPEECH</code> +     * </ul> +     */ +    public int advance() { +        return SR_RecognizerAdvance(mRecognizer); +    } +     +    /** +     * Put audio samples into the <code>Recognizer</code>. +     * @param buf holds the audio samples. +     * @param offset offset of the first sample. +     * @param length number of bytes containing samples. +     * @param isLast indicates no more audio data, normally false. +     * @return number of bytes accepted. +     */ +    public int putAudio(byte[] buf, int offset, int length, boolean isLast) { +        return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast); +    } +     +    /** +     * Read audio samples from an <code>InputStream</code> and put them in the +     * <code>Recognizer</code>. +     * @param audio <code>InputStream</code> containing PCM audio samples. +     */ +    public void putAudio(InputStream audio) throws IOException { +        // make sure the audio buffer is allocated +        if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512]; +        // read some data +        int nbytes = audio.read(mPutAudioBuffer); +        // eof, so signal Recognizer +        if (nbytes == -1) { +            SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true); +        } +        // put it into the Recognizer +        else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) { +            throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes); +        } +    } +     +    // audio buffer for putAudio(InputStream) +    private byte[] mPutAudioBuffer = null; + +    /** +     * Get the number of recognition results.  Must be called after +     * <code>EVENT_RECOGNITION_RESULT</code> is returned by +     * <code>advance</code>, but before <code>stop</code>. +     *  +     * @return number of results in nbest list. +     */ +    public int getResultCount() { +        return SR_RecognizerResultGetSize(mRecognizer); +    } + +    /** +     * Get a set of keys for the result.  Must be called after +     * <code>EVENT_RECOGNITION_RESULT</code> is returned by +     * <code>advance</code>, but before <code>stop</code>. +     *  +     * @param index index of result. +     * @return array of keys. +     */ +    public String[] getResultKeys(int index) { +        return SR_RecognizerResultGetKeyList(mRecognizer, index); +    } + +    /** +     * Get a result value.  Must be called after +     * <code>EVENT_RECOGNITION_RESULT</code> is returned by +     * <code>advance</code>, but before <code>stop</code>. +     *  +     * @param index index of the result. +     * @param key key of the result.  This is typically one of +     * <code>KEY_CONFIDENCE</code>, <code>KEY_LITERAL</code>, or +     * <code>KEY_MEANING</code>, but the user can also define their own keys +     * in a grxml file, or in the <code>tag</code> slot of +     * <code>Grammar.addWordToSlot</code>. +     * @return the result. +     */ +    public String getResult(int index, String key) { +        return SR_RecognizerResultGetValue(mRecognizer, index, key); +    } + +    /** +     * Stop the <code>Recognizer</code>. +     */ +    public void stop() { +        SR_RecognizerStop(mRecognizer); +        SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash"); +    } +     +    /** +     * Reset the acoustic state vectorto it's default value. +     *  +     * @hide +     */ +    public void resetAcousticState() { +        SR_AcousticStateReset(mRecognizer); +    } +     +    /** +     * Set the acoustic state vector. +     * @param state String containing the acoustic state vector. +     *  +     * @hide +     */ +    public void setAcousticState(String state) { +        SR_AcousticStateSet(mRecognizer, state); +    } +     +    /** +     * Get the acoustic state vector. +     * @return String containing the acoustic state vector. +     *  +     * @hide +     */ +    public String getAcousticState() { +        return SR_AcousticStateGet(mRecognizer); +    } + +    /** +     * Clean up resources. +     */ +    public void destroy() { +        try { +            if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary); +        } finally { +            mVocabulary = 0; +            try { +                if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer); +            } finally { +                try { +                    if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer); +                } finally { +                    mRecognizer = 0; +                    try { +                        SR_SessionDestroy(); +                    } finally { +                        PMemShutdown(); +                    } +                } +            } +        } +    } + +    /** +     * Clean up resources. +     */ +    protected void finalize() throws Throwable { +        if (mVocabulary != 0 || mRecognizer != 0) { +            destroy(); +            throw new IllegalStateException("someone forgot to destroy Recognizer"); +        } +    } +     +    /* an example session captured, for reference +    void doall() { +        if (PMemInit ( ) +           || lhs_audioinOpen ( WAVE_MAPPER, SREC_TEST_DEFAULT_AUDIO_FREQUENCY, &audio_in_handle ) +           || srec_test_init_application_data ( &applicationData, argc, argv ) +           || SR_SessionCreate ( "/system/usr/srec/config/en.us/baseline11k.par" ) +           || SR_RecognizerCreate ( &applicationData.recognizer ) +           || SR_RecognizerSetup ( applicationData.recognizer) +           || ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), filename, &flen ) +           || SR_VocabularyLoad ( filename, &applicationData.vocabulary ) +           || SR_VocabularyGetLanguage ( applicationData.vocabulary, &applicationData.locale ) +           || (applicationData.nametag = NULL) +           || SR_NametagsCreate ( &applicationData.nametags ) +           || (LSTRCPY ( applicationData.grammars [0].grammar_path, "/system/usr/srec/config/en.us/grammars/VoiceDialer.g2g" ), 0) +           || (LSTRCPY ( applicationData.grammars [0].grammarID, "BothTags" ), 0) +           || (LSTRCPY ( applicationData.grammars [0].ruleName, "trash" ), 0) +           || (applicationData.grammars [0].is_ve_grammar = ESR_FALSE, 0) +           || SR_GrammarLoad (applicationData.grammars [0].grammar_path, &applicationData.grammars [applicationData.grammarCount].grammar ) +           || SR_GrammarSetupVocabulary ( applicationData.grammars [0].grammar, applicationData.vocabulary ) +           || SR_GrammarSetupRecognizer( applicationData.grammars [0].grammar, applicationData.recognizer ) +           || SR_GrammarSetDispatchFunction ( applicationData.grammars [0].grammar, L("myDSMCallback"), NULL, myDSMCallback ) +           || (applicationData.grammarCount++, 0) +           || SR_RecognizerActivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, +                           applicationData.grammars [0].ruleName, 1 ) +           || (applicationData.active_grammar_num = 0, 0) +           || lhs_audioinStart ( audio_in_handle ) +           || SR_RecognizerStart ( applicationData.recognizer ) +           || strl ( applicationData.grammars [0].grammar, &applicationData, audio_in_handle, &recognition_count ) +           || SR_RecognizerStop ( applicationData.recognizer ) +           || lhs_audioinStop ( audio_in_handle ) +           || SR_RecognizerDeactivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, applicationData.grammars [0].ruleName ) +           || (applicationData.active_grammar_num = -1, 0) +           || SR_GrammarDestroy ( applicationData.grammars [0].grammar ) +           || (applicationData.grammarCount--, 0) +           || SR_NametagsDestroy ( applicationData.nametags ) +           || (applicationData.nametags = NULL, 0) +           || SR_VocabularyDestroy ( applicationData.vocabulary ) +           || (applicationData.vocabulary = NULL) +           || SR_RecognizerUnsetup ( applicationData.recognizer) // releases acoustic models +           || SR_RecognizerDestroy ( applicationData.recognizer ) +           || (applicationData.recognizer = NULL) +           || SR_SessionDestroy ( ) +           || srec_test_shutdown_application_data ( &applicationData ) +           || lhs_audioinClose ( &audio_in_handle ) +           || PMemShutdown ( ) +    } +    */ + + +    // +    // PMem native methods +    // +    private static native void PMemInit(); +    private static native void PMemShutdown(); + + +    // +    // SR_Session native methods +    // +    private static native void SR_SessionCreate(String filename); +    private static native void SR_SessionDestroy(); + + +    // +    // SR_Recognizer native methods +    // +     +    /** +     * Reserved value. +     */ +    public final static int EVENT_INVALID = 0; +     +    /** +     * <code>Recognizer</code> could not find a match for the utterance. +     */ +    public final static int EVENT_NO_MATCH = 1; +     +    /** +     * <code>Recognizer</code> processed one frame of audio. +     */ +    public final static int EVENT_INCOMPLETE = 2; +     +    /** +     * <code>Recognizer</code> has just been started. +     */ +    public final static int EVENT_STARTED = 3; +     +    /** +     * <code>Recognizer</code> is stopped. +     */ +    public final static int EVENT_STOPPED = 4; +     +    /** +     * Beginning of speech detected. +     */ +    public final static int EVENT_START_OF_VOICING = 5; +     +    /** +     * End of speech detected. +     */ +    public final static int EVENT_END_OF_VOICING = 6; +     +    /** +     * Beginning of utterance occured too soon. +     */ +    public final static int EVENT_SPOKE_TOO_SOON = 7; +     +    /** +     * Recognition match detected. +     */ +    public final static int EVENT_RECOGNITION_RESULT = 8; +     +    /** +     * Timeout occured before beginning of utterance. +     */ +    public final static int EVENT_START_OF_UTTERANCE_TIMEOUT = 9; +     +    /** +     * Timeout occured before speech recognition could complete. +     */ +    public final static int EVENT_RECOGNITION_TIMEOUT = 10; +     +    /** +     * Not enough samples to process one frame. +     */ +    public final static int EVENT_NEED_MORE_AUDIO = 11; +     +    /** +     * More audio encountered than is allowed by 'swirec_max_speech_duration'. +     */ +    public final static int EVENT_MAX_SPEECH = 12; + +    /** +     * Produce a displayable string from an <code>advance</code> event. +     * @param event +     * @return String representing the event. +     */ +    public static String eventToString(int event) { +        switch (event) { +            case EVENT_INVALID: +                return "EVENT_INVALID"; +            case EVENT_NO_MATCH: +                return "EVENT_NO_MATCH"; +            case EVENT_INCOMPLETE: +                return "EVENT_INCOMPLETE"; +            case EVENT_STARTED: +                return "EVENT_STARTED"; +            case EVENT_STOPPED: +                return "EVENT_STOPPED"; +            case EVENT_START_OF_VOICING: +                return "EVENT_START_OF_VOICING"; +            case EVENT_END_OF_VOICING: +                return "EVENT_END_OF_VOICING"; +            case EVENT_SPOKE_TOO_SOON: +                return "EVENT_SPOKE_TOO_SOON"; +            case EVENT_RECOGNITION_RESULT: +                return "EVENT_RECOGNITION_RESULT"; +            case EVENT_START_OF_UTTERANCE_TIMEOUT: +                return "EVENT_START_OF_UTTERANCE_TIMEOUT"; +            case EVENT_RECOGNITION_TIMEOUT: +                return "EVENT_RECOGNITION_TIMEOUT"; +            case EVENT_NEED_MORE_AUDIO: +                return "EVENT_NEED_MORE_AUDIO"; +            case EVENT_MAX_SPEECH: +                return "EVENT_MAX_SPEECH"; +        } +        return "EVENT_" + event; +    } + +    // +    // SR_Recognizer methods +    // +    private static native void SR_RecognizerStart(int recognizer); +    private static native void SR_RecognizerStop(int recognizer); +    private static native int SR_RecognizerCreate(); +    private static native void SR_RecognizerDestroy(int recognizer); +    private static native void SR_RecognizerSetup(int recognizer); +    private static native void SR_RecognizerUnsetup(int recognizer); +    private static native boolean SR_RecognizerIsSetup(int recognizer); +    private static native String SR_RecognizerGetParameter(int recognizer, String key); +    private static native int SR_RecognizerGetSize_tParameter(int recognizer, String key); +    private static native boolean SR_RecognizerGetBoolParameter(int recognizer, String key); +    private static native void SR_RecognizerSetParameter(int recognizer, String key, String value); +    private static native void SR_RecognizerSetSize_tParameter(int recognizer, +            String key, int value); +    private static native void SR_RecognizerSetBoolParameter(int recognizer, String key, +            boolean value); +    private static native void SR_RecognizerSetupRule(int recognizer, int grammar, +            String ruleName); +    private static native boolean SR_RecognizerHasSetupRules(int recognizer); +    private static native void SR_RecognizerActivateRule(int recognizer, int grammar, +            String ruleName, int weight); +    private static native void SR_RecognizerDeactivateRule(int recognizer, int grammar, +            String ruleName); +    private static native void SR_RecognizerDeactivateAllRules(int recognizer); +    private static native boolean SR_RecognizerIsActiveRule(int recognizer, int grammar, +            String ruleName); +    private static native boolean SR_RecognizerCheckGrammarConsistency(int recognizer, +            int grammar); +    private static native int SR_RecognizerPutAudio(int recognizer, byte[] buffer, int offset, +            int length, boolean isLast); +    private static native int SR_RecognizerAdvance(int recognizer); +    // private static native void SR_RecognizerLoadUtterance(int recognizer, +    //         const LCHAR* filename); +    // private static native void SR_RecognizerLoadWaveFile(int recognizer, +    //         const LCHAR* filename); +    // private static native void SR_RecognizerSetLockFunction(int recognizer, +    //         SR_RecognizerLockFunction function, void* data); +    private static native boolean SR_RecognizerIsSignalClipping(int recognizer); +    private static native boolean SR_RecognizerIsSignalDCOffset(int recognizer); +    private static native boolean SR_RecognizerIsSignalNoisy(int recognizer); +    private static native boolean SR_RecognizerIsSignalTooQuiet(int recognizer); +    private static native boolean SR_RecognizerIsSignalTooFewSamples(int recognizer); +    private static native boolean SR_RecognizerIsSignalTooManySamples(int recognizer); +    // private static native void SR_Recognizer_Change_Sample_Rate (size_t new_sample_rate); +     +     +    // +    // SR_AcousticState native methods +    // +    private static native void SR_AcousticStateReset(int recognizer); +    private static native void SR_AcousticStateSet(int recognizer, String state); +    private static native String SR_AcousticStateGet(int recognizer); + + +    // +    // SR_Grammar native methods +    // +    private static native void SR_GrammarCompile(int grammar); +    private static native void SR_GrammarAddWordToSlot(int grammar, String slot, +            String word, String pronunciation, int weight, String tag); +    private static native void SR_GrammarResetAllSlots(int grammar); +    // private static native void SR_GrammarAddNametagToSlot(int grammar, String slot, +    // const struct SR_Nametag_t* nametag, int weight, String tag); +    private static native void SR_GrammarSetupVocabulary(int grammar, int vocabulary); +    // private static native void SR_GrammarSetupModels(int grammar, SR_AcousticModels* models); +    private static native void SR_GrammarSetupRecognizer(int grammar, int recognizer); +    private static native void SR_GrammarUnsetupRecognizer(int grammar); +    // private static native void SR_GrammarGetModels(int grammar,SR_AcousticModels** models); +    private static native int SR_GrammarCreate(); +    private static native void SR_GrammarDestroy(int grammar); +    private static native int SR_GrammarLoad(String filename); +    private static native void SR_GrammarSave(int grammar, String filename); +    // private static native void SR_GrammarSetDispatchFunction(int grammar, +    //         const LCHAR* name, void* userData, SR_GrammarDispatchFunction function); +    // private static native void SR_GrammarSetParameter(int grammar, const +    //         LCHAR* key, void* value); +    // private static native void SR_GrammarSetSize_tParameter(int grammar, +    //         const LCHAR* key, size_t value); +    // private static native void SR_GrammarGetParameter(int grammar, const +    //         LCHAR* key, void** value); +    // private static native void SR_GrammarGetSize_tParameter(int grammar, +    //         const LCHAR* key, size_t* value); +    // private static native void SR_GrammarCheckParse(int grammar, const LCHAR* +    //         transcription, SR_SemanticResult** result, size_t* resultCount); +    private static native void SR_GrammarAllowOnly(int grammar, String transcription); +    private static native void SR_GrammarAllowAll(int grammar); + + +    // +    // SR_Vocabulary native methods +    // +    // private static native int SR_VocabularyCreate(); +    private static native int SR_VocabularyLoad(); +    // private static native void SR_VocabularySave(SR_Vocabulary* self, +    //         const LCHAR* filename); +    // private static native void SR_VocabularyAddWord(SR_Vocabulary* self, +    //         const LCHAR* word); +    // private static native void SR_VocabularyGetLanguage(SR_Vocabulary* self, +    //         ESR_Locale* locale); +    private static native void SR_VocabularyDestroy(int vocabulary); +    private static native String SR_VocabularyGetPronunciation(int vocabulary, String word); + + +    // +    // SR_RecognizerResult native methods +    // +    private static native byte[] SR_RecognizerResultGetWaveform(int recognizer); +    private static native int SR_RecognizerResultGetSize(int recognizer); +    private static native int SR_RecognizerResultGetKeyCount(int recognizer, int nbest); +    private static native String[] SR_RecognizerResultGetKeyList(int recognizer, int nbest); +    private static native String SR_RecognizerResultGetValue(int recognizer, +            int nbest, String key); +    // private static native void SR_RecognizerResultGetLocale(int recognizer, ESR_Locale* locale); +} diff --git a/core/java/android/speech/srec/UlawEncoderInputStream.java b/core/java/android/speech/srec/UlawEncoderInputStream.java new file mode 100644 index 0000000..132fe027 --- /dev/null +++ b/core/java/android/speech/srec/UlawEncoderInputStream.java @@ -0,0 +1,186 @@ +/* + * --------------------------------------------------------------------------- + * UlawEncoderInputStream.java + * + * Copyright 2008 Nuance Communciations, Inc. + * + * Licensed under the Apache License, Version 2.0 (the 'License'); you may not + * use this file except in compliance with the License. + * + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * --------------------------------------------------------------------------- + */ + +package android.speech.srec; + +import java.io.IOException; +import java.io.InputStream; + +/** + * InputStream which transforms 16 bit pcm data to ulaw data. + * + * @hide pending API council approval + */ +public final class UlawEncoderInputStream extends InputStream { +    private final static String TAG = "UlawEncoderInputStream"; +     +    private final static int MAX_ULAW = 8192; +    private final static int SCALE_BITS = 16; +     +    private InputStream mIn; +     +    private int mMax = 0; +     +    private final byte[] mBuf = new byte[1024]; +    private int mBufCount = 0; // should be 0 or 1 +     +    private final byte[] mOneByte = new byte[1]; + +     +    public static void encode(byte[] pcmBuf, int pcmOffset, +            byte[] ulawBuf, int ulawOffset, int length, int max) { +         +        // from  'ulaw' in wikipedia +        // +8191 to +8159                          0x80 +        // +8158 to +4063 in 16 intervals of 256   0x80 + interval number +        // +4062 to +2015 in 16 intervals of 128   0x90 + interval number +        // +2014 to  +991 in 16 intervals of  64   0xA0 + interval number +        //  +990 to  +479 in 16 intervals of  32   0xB0 + interval number +        //  +478 to  +223 in 16 intervals of  16   0xC0 + interval number +        //  +222 to   +95 in 16 intervals of   8   0xD0 + interval number +        //   +94 to   +31 in 16 intervals of   4   0xE0 + interval number +        //   +30 to    +1 in 15 intervals of   2   0xF0 + interval number +        //     0                                   0xFF +         +        //    -1                                   0x7F +        //   -31 to    -2 in 15 intervals of   2   0x70 + interval number +        //   -95 to   -32 in 16 intervals of   4   0x60 + interval number +        //  -223 to   -96 in 16 intervals of   8   0x50 + interval number +        //  -479 to  -224 in 16 intervals of  16   0x40 + interval number +        //  -991 to  -480 in 16 intervals of  32   0x30 + interval number +        // -2015 to  -992 in 16 intervals of  64   0x20 + interval number +        // -4063 to -2016 in 16 intervals of 128   0x10 + interval number +        // -8159 to -4064 in 16 intervals of 256   0x00 + interval number +        // -8192 to -8160                          0x00 +         +        // set scale factors +        if (max <= 0) max = MAX_ULAW; +         +        int coef = MAX_ULAW * (1 << SCALE_BITS) / max; +         +        for (int i = 0; i < length; i++) { +            int pcm = (0xff & pcmBuf[pcmOffset++]) + (pcmBuf[pcmOffset++] << 8); +            pcm = (pcm * coef) >> SCALE_BITS; +             +            int ulaw; +            if (pcm >= 0) { +                ulaw = pcm <= 0 ? 0xff : +                        pcm <=   30 ? 0xf0 + ((  30 - pcm) >> 1) : +                        pcm <=   94 ? 0xe0 + ((  94 - pcm) >> 2) : +                        pcm <=  222 ? 0xd0 + (( 222 - pcm) >> 3) : +                        pcm <=  478 ? 0xc0 + (( 478 - pcm) >> 4) : +                        pcm <=  990 ? 0xb0 + (( 990 - pcm) >> 5) : +                        pcm <= 2014 ? 0xa0 + ((2014 - pcm) >> 6) : +                        pcm <= 4062 ? 0x90 + ((4062 - pcm) >> 7) : +                        pcm <= 8158 ? 0x80 + ((8158 - pcm) >> 8) : +                        0x80; +            } else { +                ulaw = -1 <= pcm ? 0x7f : +                          -31 <= pcm ? 0x70 + ((pcm -   -31) >> 1) : +                          -95 <= pcm ? 0x60 + ((pcm -   -95) >> 2) : +                         -223 <= pcm ? 0x50 + ((pcm -  -223) >> 3) : +                         -479 <= pcm ? 0x40 + ((pcm -  -479) >> 4) : +                         -991 <= pcm ? 0x30 + ((pcm -  -991) >> 5) : +                        -2015 <= pcm ? 0x20 + ((pcm - -2015) >> 6) : +                        -4063 <= pcm ? 0x10 + ((pcm - -4063) >> 7) : +                        -8159 <= pcm ? 0x00 + ((pcm - -8159) >> 8) : +                        0x00; +            } +            ulawBuf[ulawOffset++] = (byte)ulaw; +        } +    } +     +    /** +     * Compute the maximum of the absolute value of the pcm samples. +     * The return value can be used to set ulaw encoder scaling. +     * @param pcmBuf array containing 16 bit pcm data. +     * @param offset offset of start of 16 bit pcm data. +     * @param length number of pcm samples (not number of input bytes) +     * @return maximum abs of pcm data values +     */ +    public static int maxAbsPcm(byte[] pcmBuf, int offset, int length) { +        int max = 0; +        for (int i = 0; i < length; i++) { +            int pcm = (0xff & pcmBuf[offset++]) + (pcmBuf[offset++] << 8); +            if (pcm < 0) pcm = -pcm; +            if (pcm > max) max = pcm; +        } +        return max; +    } + +    /** +     * Create an InputStream which takes 16 bit pcm data and produces ulaw data. +     * @param in InputStream containing 16 bit pcm data. +     * @param max pcm value corresponding to maximum ulaw value. +     */ +    public UlawEncoderInputStream(InputStream in, int max) { +        mIn = in; +        mMax = max; +    } +     +    @Override +    public int read(byte[] buf, int offset, int length) throws IOException { +        if (mIn == null) throw new IllegalStateException("not open"); + +        // return at least one byte, but try to fill 'length' +        while (mBufCount < 2) { +            int n = mIn.read(mBuf, mBufCount, Math.min(length * 2, mBuf.length - mBufCount)); +            if (n == -1) return -1; +            mBufCount += n; +        } +         +        // compand data +        int n = Math.min(mBufCount / 2, length); +        encode(mBuf, 0, buf, offset, n, mMax); +         +        // move data to bottom of mBuf +        mBufCount -= n * 2; +        for (int i = 0; i < mBufCount; i++) mBuf[i] = mBuf[i + n * 2]; +         +        return n; +    } +     +    @Override +    public int read(byte[] buf) throws IOException { +        return read(buf, 0, buf.length); +    } +     +    @Override +    public int read() throws IOException { +        int n = read(mOneByte, 0, 1); +        if (n == -1) return -1; +        return 0xff & (int)mOneByte[0]; +    } +     +    @Override +    public void close() throws IOException { +        if (mIn != null) { +            InputStream in = mIn; +            mIn = null; +            in.close(); +        } +    } +     +    @Override +    public int available() throws IOException { +        return (mIn.available() + mBufCount) / 2; +    } +} diff --git a/core/java/android/speech/srec/WaveHeader.java b/core/java/android/speech/srec/WaveHeader.java new file mode 100644 index 0000000..a99496d --- /dev/null +++ b/core/java/android/speech/srec/WaveHeader.java @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.speech.srec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * This class represents the header of a WAVE format audio file, which usually + * have a .wav suffix.  The following integer valued fields are contained: + * <ul> + * <li> format - usually PCM, ALAW or ULAW. + * <li> numChannels - 1 for mono, 2 for stereo. + * <li> sampleRate - usually 8000, 11025, 16000, 22050, or 44100 hz. + * <li> bitsPerSample - usually 16 for PCM, 8 for ALAW, or 8 for ULAW. + * <li> numBytes - size of audio data after this header, in bytes. + * </ul> + * @hide pending API council approval + */ +public class WaveHeader { +     +    // follows WAVE format in http://ccrma.stanford.edu/courses/422/projects/WaveFormat + +    private static final String TAG = "WaveHeader"; +     +    private static final int HEADER_LENGTH = 44; +     +    /** Indicates PCM format. */ +    public static final short FORMAT_PCM = 1; +    /** Indicates ALAW format. */ +    public static final short FORMAT_ALAW = 6; +    /** Indicates ULAW format. */ +    public static final short FORMAT_ULAW = 7; +     +    private short mFormat; +    private short mNumChannels; +    private int mSampleRate; +    private short mBitsPerSample; +    private int mNumBytes; +     +    /** +     * Construct a WaveHeader, with all fields defaulting to zero. +     */ +    public WaveHeader() { +    } +     +    /** +     * Construct a WaveHeader, with fields initialized. +     * @param format format of audio data, +     * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.  +     * @param numChannels 1 for mono, 2 for stereo. +     * @param sampleRate typically 8000, 11025, 16000, 22050, or 44100 hz. +     * @param bitsPerSample usually 16 for PCM, 8 for ULAW or 8 for ALAW. +     * @param numBytes size of audio data after this header, in bytes. +     */ +    public WaveHeader(short format, short numChannels, int sampleRate, short bitsPerSample, int numBytes) { +        mFormat = format; +        mSampleRate = sampleRate; +        mNumChannels = numChannels; +        mBitsPerSample = bitsPerSample; +        mNumBytes = numBytes; +    } +     +    /** +     * Get the format field. +     * @return format field, +     * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}. +     */ +    public short getFormat() { +        return mFormat; +    } +     +    /** +     * Set the format field. +     * @param format +     * one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}. +     * @return reference to this WaveHeader instance. +     */ +    public WaveHeader setFormat(short format) { +        mFormat = format; +        return this; +    } +     +    /** +     * Get the number of channels. +     * @return number of channels, 1 for mono, 2 for stereo. +     */ +    public short getNumChannels() { +        return mNumChannels; +    } +     +    /** +     * Set the number of channels. +     * @param numChannels 1 for mono, 2 for stereo. +     * @return reference to this WaveHeader instance. +     */ +    public WaveHeader setNumChannels(short numChannels) { +        mNumChannels = numChannels; +        return this; +    } +     +    /** +     * Get the sample rate. +     * @return sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz. +     */ +    public int getSampleRate() { +        return mSampleRate; +    } +     +    /** +     * Set the sample rate. +     * @param sampleRate sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz. +     * @return reference to this WaveHeader instance. +     */ +    public WaveHeader setSampleRate(int sampleRate) { +        mSampleRate = sampleRate; +        return this; +    } +     +    /** +     * Get the number of bits per sample. +     * @return number of bits per sample, +     * usually 16 for PCM, 8 for ULAW or 8 for ALAW. +     */ +    public short getBitsPerSample() { +        return mBitsPerSample; +    } +     +    /** +     * Set the number of bits per sample. +     * @param bitsPerSample number of bits per sample, +     * usually 16 for PCM, 8 for ULAW or 8 for ALAW. +     * @return reference to this WaveHeader instance. +     */ +    public WaveHeader setBitsPerSample(short bitsPerSample) { +        mBitsPerSample = bitsPerSample; +        return this; +    } +     +    /** +     * Get the size of audio data after this header, in bytes. +     * @return size of audio data after this header, in bytes. +     */ +    public int getNumBytes() { +        return mNumBytes; +    } +     +    /** +     * Set the size of audio data after this header, in bytes. +     * @param numBytes size of audio data after this header, in bytes. +     * @return reference to this WaveHeader instance. +     */ +    public WaveHeader setNumBytes(int numBytes) { +        mNumBytes = numBytes; +        return this; +    } +     +    /** +     * Read and initialize a WaveHeader. +     * @param in {@link java.io.InputStream} to read from. +     * @return number of bytes consumed. +     * @throws IOException +     */ +    public int read(InputStream in) throws IOException { +        /* RIFF header */ +        readId(in, "RIFF"); +        int numBytes = readInt(in) - 36; +        readId(in, "WAVE"); + +        /* fmt chunk */ +        readId(in, "fmt "); +        if (16 != readInt(in)) throw new IOException("fmt chunk length not 16"); +        mFormat = readShort(in); +        mNumChannels = readShort(in); +        mSampleRate = readInt(in); +        int byteRate = readInt(in); +        short blockAlign = readShort(in); +        mBitsPerSample = readShort(in); +        if (byteRate != mNumChannels * mSampleRate * mBitsPerSample / 8) { +            throw new IOException("fmt.ByteRate field inconsistent"); +        } +        if (blockAlign != mNumChannels * mBitsPerSample / 8) { +            throw new IOException("fmt.BlockAlign field inconsistent"); +        } + +        /* data chunk */ +        readId(in, "data"); +        mNumBytes = readInt(in); +         +        return HEADER_LENGTH; +    } + +    private static void readId(InputStream in, String id) throws IOException { +        for (int i = 0; i < id.length(); i++) { +            if (id.charAt(i) != in.read()) throw new IOException( id + " tag not present"); +        } +    } + +    private static int readInt(InputStream in) throws IOException { +        return in.read() | (in.read() << 8) | (in.read() << 16) | (in.read() << 24); +    } + +    private static short readShort(InputStream in) throws IOException { +        return (short)(in.read() | (in.read() << 8)); +    } + +    /** +     * Write a WAVE file header. +     * @param out {@link java.io.OutputStream} to receive the header. +     * @return number of bytes written. +     * @throws IOException +     */ +    public int write(OutputStream out) throws IOException { +        /* RIFF header */ +        writeId(out, "RIFF"); +        writeInt(out, 36 + mNumBytes); +        writeId(out, "WAVE"); + +        /* fmt chunk */ +        writeId(out, "fmt "); +        writeInt(out, 16); +        writeShort(out, mFormat); +        writeShort(out, mNumChannels); +        writeInt(out, mSampleRate); +        writeInt(out, mNumChannels * mSampleRate * mBitsPerSample / 8); +        writeShort(out, (short)(mNumChannels * mBitsPerSample / 8)); +        writeShort(out, mBitsPerSample); + +        /* data chunk */ +        writeId(out, "data"); +        writeInt(out, mNumBytes); +         +        return HEADER_LENGTH; +    } + +    private static void writeId(OutputStream out, String id) throws IOException { +        for (int i = 0; i < id.length(); i++) out.write(id.charAt(i)); +    } + +    private static void writeInt(OutputStream out, int val) throws IOException { +        out.write(val >> 0); +        out.write(val >> 8); +        out.write(val >> 16); +        out.write(val >> 24); +    } + +    private static void writeShort(OutputStream out, short val) throws IOException { +        out.write(val >> 0); +        out.write(val >> 8); +    } +     +    @Override +    public String toString() { +        return String.format( +                "WaveHeader format=%d numChannels=%d sampleRate=%d bitsPerSample=%d numBytes=%d", +                mFormat, mNumChannels, mSampleRate, mBitsPerSample, mNumBytes); +    } + +} diff --git a/core/java/android/speech/srec/package.html b/core/java/android/speech/srec/package.html new file mode 100644 index 0000000..9a99df8 --- /dev/null +++ b/core/java/android/speech/srec/package.html @@ -0,0 +1,6 @@ +<HTML> +<BODY> +Simple, synchronous SREC speech recognition API. +@hide +</BODY> +</HTML> | 
