diff options
Diffstat (limited to 'tts')
-rwxr-xr-x | tts/java/android/tts/SynthProxy.java | 172 | ||||
-rwxr-xr-x | tts/java/android/tts/Tts.java | 605 | ||||
-rwxr-xr-x | tts/jni/Android.mk | 26 | ||||
-rwxr-xr-x | tts/jni/android_tts_SynthProxy.cpp | 595 |
4 files changed, 1398 insertions, 0 deletions
diff --git a/tts/java/android/tts/SynthProxy.java b/tts/java/android/tts/SynthProxy.java new file mode 100755 index 0000000..4ed9754 --- /dev/null +++ b/tts/java/android/tts/SynthProxy.java @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.tts; + +import android.util.Log; +import java.lang.ref.WeakReference; + +/** + * @hide + * + * The SpeechSynthesis class provides a high-level api to create and play + * synthesized speech. This class is used internally to talk to a native + * TTS library that implements the interface defined in + * frameworks/base/include/tts/TtsEngine.h + * + */ +@SuppressWarnings("unused") +public class SynthProxy { + + // + // External API + // + + /** + * Constructor; pass the location of the native TTS .so to use. + */ + public SynthProxy(String nativeSoLib) { + Log.e("TTS is loading", nativeSoLib); + native_setup(new WeakReference<SynthProxy>(this), nativeSoLib); + } + + /** + * Stops and clears the AudioTrack. + */ + public void stop() { + native_stop(mJniData); + } + + /** + * Synthesize speech and speak it directly using AudioTrack. + */ + public void speak(String text) { + native_speak(mJniData, text); + } + + /** + * Synthesize speech to a file. The current implementation writes a valid + * WAV file to the given path, assuming it is writable. Something like + * "/sdcard/???.wav" is recommended. + */ + public void synthesizeToFile(String text, String filename) { + native_synthesizeToFile(mJniData, text, filename); + } + + // TODO add IPA methods + + /** + * Sets the language + */ + public void setLanguage(String language) { + native_setLanguage(mJniData, language); + } + + /** + * Sets the speech rate + */ + public final void setSpeechRate(int speechRate) { + native_setSpeechRate(mJniData, speechRate); + } + + + /** + * Plays the given audio buffer + */ + public void playAudioBuffer(int bufferPointer, int bufferSize) { + native_playAudioBuffer(mJniData, bufferPointer, bufferSize); + } + + /** + * Gets the currently set language + */ + public String getLanguage() { + return native_getLanguage(mJniData); + } + + /** + * Gets the currently set rate + */ + public int getRate() { + return native_getRate(mJniData); + } + + /** + * Shuts down the native synthesizer + */ + public void shutdown() { + native_shutdown(mJniData); + } + + // + // Internal + // + + protected void finalize() { + native_finalize(mJniData); + mJniData = 0; + } + + static { + System.loadLibrary("synthproxy"); + } + + private final static String TAG = "SynthProxy"; + + /** + * Accessed by native methods + */ + private int mJniData = 0; + + private native final void native_setup(Object weak_this, + String nativeSoLib); + + private native final void native_finalize(int jniData); + + private native final void native_stop(int jniData); + + private native final void native_speak(int jniData, String text); + + private native final void native_synthesizeToFile(int jniData, String text, String filename); + + private native final void native_setLanguage(int jniData, String language); + + private native final void native_setSpeechRate(int jniData, int speechRate); + + // TODO add buffer format + private native final void native_playAudioBuffer(int jniData, int bufferPointer, int bufferSize); + + private native final String native_getLanguage(int jniData); + + private native final int native_getRate(int jniData); + + private native final void native_shutdown(int jniData); + + + /** + * Callback from the C layer + */ + @SuppressWarnings("unused") + private static void postNativeSpeechSynthesizedInJava(Object tts_ref, + int bufferPointer, int bufferSize) { + + Log.i("TTS plugin debug", "bufferPointer: " + bufferPointer + + " bufferSize: " + bufferSize); + + SynthProxy nativeTTS = (SynthProxy)((WeakReference)tts_ref).get(); + // TODO notify TTS service of synthesis/playback completion, + // method definition to be changed. + } +} diff --git a/tts/java/android/tts/Tts.java b/tts/java/android/tts/Tts.java new file mode 100755 index 0000000..6c8b36d --- /dev/null +++ b/tts/java/android/tts/Tts.java @@ -0,0 +1,605 @@ +/* + * Copyright (C) 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.tts; + +import android.content.ComponentName; +import android.content.Context; +import android.content.Intent; +import android.content.ServiceConnection; +import android.content.pm.PackageManager; +import android.content.pm.ResolveInfo; +import android.os.IBinder; +import android.os.RemoteException; +import android.util.Log; + +/** + * @hide + * + * Synthesizes speech from text. This abstracts away the complexities of using + * the TTS service such as setting up the IBinder connection and handling + * RemoteExceptions, etc. + * + * The TTS should always be safe the use; if the user does not have the + * necessary TTS apk installed, the behavior is that all calls to the TTS act as + * no-ops. + * + */ +//FIXME #TTS# review + complete javadoc +public class Tts { + + + /** + * Called when the TTS has initialized + * + * The InitListener must implement the onInit function. onInit is passed the + * version number of the TTS library that the user has installed; since this + * is called when the TTS has started, it is a good time to make sure that + * the user's TTS library is up to date. + */ + public interface OnInitListener { + public void onInit(int version); + } + + /** + * Called when the TTS has finished speaking by itself (speaking + * finished without being canceled). + * + */ + public interface OnSpeechCompletedListener { + public void onSpeechCompleted(); + } + + /** + * Connection needed for the TTS + */ + private ServiceConnection serviceConnection; + + private ITts itts = null; + private Context ctx = null; + private OnInitListener cb = null; + private int version = -1; + private boolean started = false; + private final Object startLock = new Object(); + private boolean showInstaller = false; + private ITtsCallback ittscallback; + private OnSpeechCompletedListener speechCompletedCallback = null; + + + /** + * The constructor for the TTS. + * + * @param context + * The context + * @param callback + * The InitListener that should be called when the TTS has + * initialized successfully. + * @param displayInstallMessage + * Boolean indicating whether or not an installation prompt + * should be displayed to users who do not have the TTS library. + * If this is true, a generic alert asking the user to install + * the TTS will be used. If you wish to specify the exact message + * of that prompt, please use TTS(Context context, InitListener + * callback, TTSVersionAlert alert) as the constructor instead. + */ + public Tts(Context context, OnInitListener callback, + boolean displayInstallMessage) { + showInstaller = displayInstallMessage; + ctx = context; + cb = callback; + if (dataFilesCheck()) { + initTts(); + } + } + + /** + * The constructor for the TTS. + * + * @param context + * The context + * @param callback + * The InitListener that should be called when the TTS has + * initialized successfully. + */ + public Tts(Context context, OnInitListener callback) { + // FIXME #TTS# support TtsVersionAlert + // showInstaller = true; + // versionAlert = alert; + ctx = context; + cb = callback; + if (dataFilesCheck()) { + initTts(); + } + } + + + public void setOnSpeechCompletedListener( + final OnSpeechCompletedListener listener) { + speechCompletedCallback = listener; + } + + + private boolean dataFilesCheck() { + // FIXME #TTS# config manager will be in settings + Log.i("TTS_FIXME", "FIXME in Tts: config manager will be in settings"); + // FIXME #TTS# implement checking of the correct installation of + // the data files. + + return true; + } + + + private void initTts() { + started = false; + + // Initialize the TTS, run the callback after the binding is successful + serviceConnection = new ServiceConnection() { + public void onServiceConnected(ComponentName name, IBinder service) { + synchronized(startLock) { + itts = ITts.Stub.asInterface(service); + try { + ittscallback = new ITtsCallback.Stub() { + //@Override + public void markReached(String mark) + throws RemoteException { + if (speechCompletedCallback != null) { + speechCompletedCallback.onSpeechCompleted(); + } + } + }; + itts.registerCallback(ittscallback); + + } catch (RemoteException e) { + initTts(); + return; + } + + started = true; + // The callback can become null if the Android OS decides to + // restart the TTS process as well as whatever is using it. + // In such cases, do nothing - the error handling from the + // speaking calls will kick in and force a proper restart of + // the TTS. + if (cb != null) { + cb.onInit(version); + } + } + } + + public void onServiceDisconnected(ComponentName name) { + synchronized(startLock) { + itts = null; + cb = null; + started = false; + } + } + }; + + Intent intent = new Intent("android.intent.action.USE_TTS"); + intent.addCategory("android.intent.category.TTS"); + // Binding will fail only if the TTS doesn't exist; + // the TTSVersionAlert will give users a chance to install + // the needed TTS. + if (!ctx.bindService(intent, serviceConnection, + Context.BIND_AUTO_CREATE)) { + if (showInstaller) { + // FIXME #TTS# show version alert + } + } + } + + + /** + * Shuts down the TTS. It is good practice to call this in the onDestroy + * method of the Activity that is using the TTS so that the TTS is stopped + * cleanly. + */ + public void shutdown() { + try { + ctx.unbindService(serviceConnection); + } catch (IllegalArgumentException e) { + // Do nothing and fail silently since an error here indicates that + // binding never succeeded in the first place. + } + } + + + /** + * Adds a mapping between a string of text and a sound resource in a + * package. + * + * @see #TTS.speak(String text, int queueMode, String[] params) + * + * @param text + * Example: <b><code>"south_south_east"</code></b><br/> + * + * @param packagename + * Pass the packagename of the application that contains the + * resource. If the resource is in your own application (this is + * the most common case), then put the packagename of your + * application here.<br/> + * Example: <b>"com.google.marvin.compass"</b><br/> + * The packagename can be found in the AndroidManifest.xml of + * your application. + * <p> + * <code><manifest xmlns:android="..." + * package="<b>com.google.marvin.compass</b>"></code> + * </p> + * + * @param resourceId + * Example: <b><code>R.raw.south_south_east</code></b> + */ + public void addSpeech(String text, String packagename, int resourceId) { + synchronized(startLock) { + if (!started) { + return; + } + try { + itts.addSpeech(text, packagename, resourceId); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Adds a mapping between a string of text and a sound file. Using this, it + * is possible to add custom pronounciations for text. + * + * @param text + * The string of text + * @param filename + * The full path to the sound file (for example: + * "/sdcard/mysounds/hello.wav") + */ + public void addSpeech(String text, String filename) { + synchronized (startLock) { + if (!started) { + return; + } + try { + itts.addSpeechFile(text, filename); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Speaks the string using the specified queuing strategy and speech + * parameters. Note that the speech parameters are not universally supported + * by all engines and will be treated as a hint. The TTS library will try to + * fulfill these parameters as much as possible, but there is no guarantee + * that the voice used will have the properties specified. + * + * @param text + * The string of text to be spoken. + * @param queueMode + * The queuing strategy to use. Use 0 for no queuing, and 1 for + * queuing. + * @param params + * The array of speech parameters to be used. Currently, only + * params[0] is defined - it is for setting the type of voice if + * the engine allows it. Possible values are "VOICE_MALE", + * "VOICE_FEMALE", and "VOICE_ROBOT". Note that right now only + * the pre-recorded voice has this support - this setting has no + * effect on eSpeak. + */ + public void speak(String text, int queueMode, String[] params) { + synchronized (startLock) { + Log.i("TTS received: ", text); + if (!started) { + return; + } + try { + itts.speak(text, queueMode, params); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Plays the earcon using the specified queueing mode and parameters. + * + * @param earcon + * The earcon that should be played + * @param queueMode + * 0 for no queue (interrupts all previous utterances), 1 for + * queued + * @param params + * An ArrayList of parameters. + */ + public void playEarcon(String earcon, int queueMode, String[] params) { + synchronized (startLock) { + if (!started) { + return; + } + try { + itts.playEarcon(earcon, queueMode, params); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Returns whether or not the TTS is busy speaking. + * + * @return Whether or not the TTS is busy speaking. + */ + public boolean isSpeaking() { + synchronized (startLock) { + if (!started) { + return false; + } + try { + return itts.isSpeaking(); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + return false; + } + } + + + /** + * Stops speech from the TTS. + */ + public void stop() { + synchronized (startLock) { + if (!started) { + return; + } + try { + itts.stop(); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Returns the version number of the TTS library that the user has + * installed. + * + * @return The version number of the TTS library that the user has + * installed. + */ + public int getVersion() { + return version; + } + + + /** + * Sets the TTS engine to be used. + * + * @param selectedEngine + * The TTS engine that should be used. + */ + public void setEngine(String engineName, String[] requestedLanguages, int strictness) { + synchronized (startLock) { + if (!started) { + return; + } + try { + itts.setEngine(engineName, requestedLanguages, strictness); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Sets the speech rate for the TTS engine. + * + * Note that the speech rate is not universally supported by all engines and + * will be treated as a hint. The TTS library will try to use the specified + * speech rate, but there is no guarantee. + * + * Currently, this will change the speech rate for the espeak engine, but it + * has no effect on any pre-recorded speech. + * + * @param speechRate + * The speech rate for the TTS engine. + */ + public void setSpeechRate(int speechRate) { + synchronized (startLock) { + if (!started) { + return; + } + try { + itts.setSpeechRate(speechRate); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Sets the language for the TTS engine. + * + * Note that the language is not universally supported by all engines and + * will be treated as a hint. The TTS library will try to use the specified + * language, but there is no guarantee. + * + * Currently, this will change the language for the espeak engine, but it + * has no effect on any pre-recorded speech. + * + * @param language + * The language to be used. The languages are specified by their + * IETF language tags as defined by BCP 47. This is the same + * standard used for the lang attribute in HTML. See: + * http://en.wikipedia.org/wiki/IETF_language_tag + */ + public void setLanguage(String language) { + synchronized (startLock) { + if (!started) { + return; + } + try { + itts.setLanguage(language); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } + } + } + + + /** + * Speaks the given text using the specified queueing mode and parameters. + * + * @param text + * The String of text that should be synthesized + * @param params + * An ArrayList of parameters. The first element of this array + * controls the type of voice to use. + * @param filename + * The string that gives the full output filename; it should be + * something like "/sdcard/myappsounds/mysound.wav". + * @return A boolean that indicates if the synthesis succeeded + */ + public boolean synthesizeToFile(String text, String[] params, + String filename) { + synchronized (startLock) { + if (!started) { + return false; + } + try { + return itts.synthesizeToFile(text, params, filename); + } catch (RemoteException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (NullPointerException e) { + // TTS died; restart it. + started = false; + initTts(); + } catch (IllegalStateException e) { + // TTS died; restart it. + started = false; + initTts(); + } + return false; + } + } + + + /** + * Displays an alert that prompts users to install the TTS engine. + * This is useful if the application expects a newer version + * of the TTS than what the user has. + */ + public void showVersionAlert() { + if (!started) { + return; + } + // FIXME #TTS# implement show version alert + } + + + /** + * Checks if the TTS service is installed or not + * + * @return A boolean that indicates whether the TTS service is installed + */ + // TODO: TTS Service itself will always be installed. Factor this out + // (may need to add another method to see if there are any working + // TTS engines on the device). + public static boolean isInstalled(Context ctx) { + PackageManager pm = ctx.getPackageManager(); + Intent intent = new Intent("android.intent.action.USE_TTS"); + intent.addCategory("android.intent.category.TTS"); + ResolveInfo info = pm.resolveService(intent, 0); + if (info == null) { + return false; + } + return true; + } + +} diff --git a/tts/jni/Android.mk b/tts/jni/Android.mk new file mode 100755 index 0000000..9abb56c --- /dev/null +++ b/tts/jni/Android.mk @@ -0,0 +1,26 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + android_tts_SynthProxy.cpp + +LOCAL_C_INCLUDES += \ + $(JNI_H_INCLUDE) + +LOCAL_SHARED_LIBRARIES := \ + libandroid_runtime \ + libnativehelper \ + libmedia \ + libutils \ + libcutils \ + libdl + + +LOCAL_MODULE:= libttssynthproxy + +LOCAL_ARM_MODE := arm + +LOCAL_PRELINK_MODULE := false + +include $(BUILD_SHARED_LIBRARY) + diff --git a/tts/jni/android_tts_SynthProxy.cpp b/tts/jni/android_tts_SynthProxy.cpp new file mode 100755 index 0000000..d8f1bf3 --- /dev/null +++ b/tts/jni/android_tts_SynthProxy.cpp @@ -0,0 +1,595 @@ +/* + * Copyright (C) 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <stdio.h> +#include <unistd.h> + +#define LOG_TAG "SynthProxy" + +#include <utils/Log.h> +#include <nativehelper/jni.h> +#include <nativehelper/JNIHelp.h> +#include <android_runtime/AndroidRuntime.h> +#include <tts/TtsEngine.h> +#include <media/AudioTrack.h> + +#include <dlfcn.h> + +#define DEFAULT_TTS_RATE 16000 +#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT +#define DEFAULT_TTS_NB_CHANNELS 1 + +#define USAGEMODE_PLAY_IMMEDIATELY 0 +#define USAGEMODE_WRITE_TO_FILE 1 + +using namespace android; + +// ---------------------------------------------------------------------------- +struct fields_t { + jfieldID synthProxyFieldJniData; + jclass synthProxyClass; + jmethodID synthProxyMethodPost; +}; + +struct afterSynthData_t { + jint jniStorage; + int usageMode; + FILE* outputFile; +}; + +// ---------------------------------------------------------------------------- +static fields_t javaTTSFields; + +// ---------------------------------------------------------------------------- +class SynthProxyJniStorage { + public : + //jclass tts_class; + jobject tts_ref; + TtsEngine* mNativeSynthInterface; + AudioTrack* mAudioOut; + uint32_t mSampleRate; + AudioSystem::audio_format mAudFormat; + int mNbChannels; + + SynthProxyJniStorage() { + //tts_class = NULL; + tts_ref = NULL; + mNativeSynthInterface = NULL; + mAudioOut = NULL; + mSampleRate = DEFAULT_TTS_RATE; + mAudFormat = DEFAULT_TTS_FORMAT; + mNbChannels = DEFAULT_TTS_NB_CHANNELS; + } + + ~SynthProxyJniStorage() { + killAudio(); + if (mNativeSynthInterface) { + mNativeSynthInterface->shutdown(); + mNativeSynthInterface = NULL; + } + } + + void killAudio() { + if (mAudioOut) { + mAudioOut->stop(); + delete mAudioOut; + mAudioOut = NULL; + } + } + + void createAudioOut(uint32_t rate, AudioSystem::audio_format format, + int channel) { + mSampleRate = rate; + mAudFormat = format; + mNbChannels = channel; + + // TODO use the TTS stream type + int streamType = AudioSystem::MUSIC; + + // retrieve system properties to ensure successful creation of the + // AudioTrack object for playback + int afSampleRate; + if (AudioSystem::getOutputSamplingRate(&afSampleRate, streamType) != NO_ERROR) { + afSampleRate = 44100; + } + int afFrameCount; + if (AudioSystem::getOutputFrameCount(&afFrameCount, streamType) != NO_ERROR) { + afFrameCount = 2048; + } + uint32_t afLatency; + if (AudioSystem::getOutputLatency(&afLatency, streamType) != NO_ERROR) { + afLatency = 500; + } + uint32_t minBufCount = afLatency / ((1000 * afFrameCount)/afSampleRate); + if (minBufCount < 2) minBufCount = 2; + int minFrameCount = (afFrameCount * rate * minBufCount)/afSampleRate; + + mAudioOut = new AudioTrack(streamType, rate, format, channel, + minFrameCount > 4096 ? minFrameCount : 4096, + 0, 0, 0, 0); // not using an AudioTrack callback + + if (mAudioOut->initCheck() != NO_ERROR) { + LOGI("AudioTrack error"); + delete mAudioOut; + mAudioOut = NULL; + } else { + LOGI("AudioTrack OK"); + mAudioOut->start(); + LOGI("AudioTrack started"); + } + } +}; + + +// ---------------------------------------------------------------------------- +void prepAudioTrack(SynthProxyJniStorage* pJniData, + uint32_t rate, AudioSystem::audio_format format, int channel) +{ + // Don't bother creating a new audiotrack object if the current + // object is already set. + if ( pJniData->mAudioOut && + (rate == pJniData->mSampleRate) && + (format == pJniData->mAudFormat) && + (channel == pJniData->mNbChannels) ){ + return; + } + if (pJniData->mAudioOut){ + pJniData->killAudio(); + } + pJniData->createAudioOut(rate, format, channel); +} + + +// ---------------------------------------------------------------------------- +/* + * Callback from TTS engine. + * Directly speaks using AudioTrack or write to file + */ +static void ttsSynthDoneCB(void * userdata, uint32_t rate, + AudioSystem::audio_format format, int channel, + int8_t *wav, size_t bufferSize) { + LOGI("ttsSynthDoneCallback: %d bytes", bufferSize); + + afterSynthData_t* pForAfter = (afterSynthData_t*)userdata; + + if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){ + LOGI("Direct speech"); + + if (wav == NULL) { + LOGI("Null: speech has completed"); + } + + if (bufferSize > 0) { + SynthProxyJniStorage* pJniData = + (SynthProxyJniStorage*)(pForAfter->jniStorage); + prepAudioTrack(pJniData, rate, format, channel); + if (pJniData->mAudioOut) { + pJniData->mAudioOut->write(wav, bufferSize); + LOGI("AudioTrack wrote: %d bytes", bufferSize); + } else { + LOGI("Can't play, null audiotrack"); + } + } + } else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) { + LOGI("Save to file"); + if (wav == NULL) { + LOGI("Null: speech has completed"); + } + if (bufferSize > 0){ + fwrite(wav, 1, bufferSize, pForAfter->outputFile); + } + } + // TODO update to call back into the SynthProxy class through the + // javaTTSFields.synthProxyMethodPost methode to notify + // playback has completed + + delete pForAfter; + return; +} + + +// ---------------------------------------------------------------------------- +static void +android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz, + jobject weak_this, jstring nativeSoLib) +{ + SynthProxyJniStorage* pJniStorage = new SynthProxyJniStorage(); + + prepAudioTrack(pJniStorage, + DEFAULT_TTS_RATE, DEFAULT_TTS_FORMAT, DEFAULT_TTS_NB_CHANNELS); + + const char *nativeSoLibNativeString = + env->GetStringUTFChars(nativeSoLib, 0); + + void *engine_lib_handle = dlopen(nativeSoLibNativeString, + RTLD_NOW | RTLD_LOCAL); + if (engine_lib_handle==NULL) { + LOGI("engine_lib_handle==NULL"); + // TODO report error so the TTS can't be used + } else { + TtsEngine *(*get_TtsEngine)() = + reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine")); + pJniStorage->mNativeSynthInterface = (*get_TtsEngine)(); + if (pJniStorage->mNativeSynthInterface) { + pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB); + } + } + + // we use a weak reference so the SynthProxy object can be garbage collected. + pJniStorage->tts_ref = env->NewGlobalRef(weak_this); + + // save the JNI resources so we can use them (and free them) later + env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, + (int)pJniStorage); + + env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString); +} + + +static void +android_tts_SynthProxy_native_finalize(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData) { + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + delete pSynthData; + } +} + + +static void +android_tts_SynthProxy_setLanguage(JNIEnv *env, jobject thiz, jint jniData, + jstring language) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_setLanguage(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + const char *langNativeString = env->GetStringUTFChars(language, 0); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->setLanguage(langNativeString, + strlen(langNativeString)); + } + env->ReleaseStringUTFChars(language, langNativeString); +} + + +static void +android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData, + int speechRate) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_setSpeechRate(): invalid JNI data"); + return; + } + + int bufSize = 10; + char buffer [bufSize]; + sprintf(buffer, "%d", speechRate); + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + LOGI("setting speech rate to %d", speechRate); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->setProperty("rate", buffer, bufSize); + } +} + + +// TODO: Refactor this to get rid of any assumptions about sample rate, etc. +static void +android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData, + jstring textJavaString, jstring filenameJavaString) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + + const char *filenameNativeString = + env->GetStringUTFChars(filenameJavaString, 0); + const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); + + afterSynthData_t* pForAfter = new (afterSynthData_t); + pForAfter->jniStorage = jniData; + pForAfter->usageMode = USAGEMODE_WRITE_TO_FILE; + + pForAfter->outputFile = fopen(filenameNativeString, "wb"); + + // Write 44 blank bytes for WAV header, then come back and fill them in + // after we've written the audio data + char header[44]; + fwrite(header, 1, 44, pForAfter->outputFile); + + unsigned int unique_identifier; + + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, + (void *)pForAfter); + } + + long filelen = ftell(pForAfter->outputFile); + + int samples = (((int)filelen) - 44) / 2; + header[0] = 'R'; + header[1] = 'I'; + header[2] = 'F'; + header[3] = 'F'; + ((uint32_t *)(&header[4]))[0] = filelen - 8; + header[8] = 'W'; + header[9] = 'A'; + header[10] = 'V'; + header[11] = 'E'; + + header[12] = 'f'; + header[13] = 'm'; + header[14] = 't'; + header[15] = ' '; + + ((uint32_t *)(&header[16]))[0] = 16; // size of fmt + + ((unsigned short *)(&header[20]))[0] = 1; // format + ((unsigned short *)(&header[22]))[0] = 1; // channels + ((uint32_t *)(&header[24]))[0] = 22050; // samplerate + ((uint32_t *)(&header[28]))[0] = 44100; // byterate + ((unsigned short *)(&header[32]))[0] = 2; // block align + ((unsigned short *)(&header[34]))[0] = 16; // bits per sample + + header[36] = 'd'; + header[37] = 'a'; + header[38] = 't'; + header[39] = 'a'; + + ((uint32_t *)(&header[40]))[0] = samples * 2; // size of data + + // Skip back to the beginning and rewrite the header + fseek(pForAfter->outputFile, 0, SEEK_SET); + fwrite(header, 1, 44, pForAfter->outputFile); + + fflush(pForAfter->outputFile); + fclose(pForAfter->outputFile); + + env->ReleaseStringUTFChars(textJavaString, textNativeString); + env->ReleaseStringUTFChars(filenameJavaString, filenameNativeString); +} + + +static void +android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData, + jstring textJavaString) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_speak(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + + if (pSynthData->mAudioOut) { + pSynthData->mAudioOut->stop(); + pSynthData->mAudioOut->start(); + } + + afterSynthData_t* pForAfter = new (afterSynthData_t); + pForAfter->jniStorage = jniData; + pForAfter->usageMode = USAGEMODE_PLAY_IMMEDIATELY; + + if (pSynthData->mNativeSynthInterface) { + const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); + pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, + (void *)pForAfter); + env->ReleaseStringUTFChars(textJavaString, textNativeString); + } +} + + +static void +android_tts_SynthProxy_stop(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_stop(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->stop(); + } + if (pSynthData->mAudioOut) { + pSynthData->mAudioOut->stop(); + } +} + + +static void +android_tts_SynthProxy_shutdown(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_shutdown(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->shutdown(); + pSynthData->mNativeSynthInterface = NULL; + } +} + + +// TODO add buffer format +static void +android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData, + int bufferPointer, int bufferSize) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + short* wav = (short*) bufferPointer; + pSynthData->mAudioOut->write(wav, bufferSize); + LOGI("AudioTrack wrote: %d bytes", bufferSize); +} + + +JNIEXPORT jstring JNICALL +android_tts_SynthProxy_getLanguage(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_getLanguage(): invalid JNI data"); + return env->NewStringUTF(""); + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + size_t bufSize = 100; + char buf[bufSize]; + memset(buf, 0, bufSize); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->getLanguage(buf, &bufSize); + } + return env->NewStringUTF(buf); +} + +JNIEXPORT int JNICALL +android_tts_SynthProxy_getRate(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_getRate(): invalid JNI data"); + return 0; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + size_t bufSize = 100; + + char buf[bufSize]; + memset(buf, 0, bufSize); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->getProperty("rate", buf, &bufSize); + } + return atoi(buf); +} + +// Dalvik VM type signatures +static JNINativeMethod gMethods[] = { + { "native_stop", + "(I)V", + (void*)android_tts_SynthProxy_stop + }, + { "native_speak", + "(ILjava/lang/String;)V", + (void*)android_tts_SynthProxy_speak + }, + { "native_synthesizeToFile", + "(ILjava/lang/String;Ljava/lang/String;)V", + (void*)android_tts_SynthProxy_synthesizeToFile + }, + { "native_setLanguage", + "(ILjava/lang/String;)V", + (void*)android_tts_SynthProxy_setLanguage + }, + { "native_setSpeechRate", + "(II)V", + (void*)android_tts_SynthProxy_setSpeechRate + }, + { "native_playAudioBuffer", + "(III)V", + (void*)android_tts_SynthProxy_playAudioBuffer + }, + { "native_getLanguage", + "(I)Ljava/lang/String;", + (void*)android_tts_SynthProxy_getLanguage + }, + { "native_getRate", + "(I)I", + (void*)android_tts_SynthProxy_getRate + }, + { "native_shutdown", + "(I)V", + (void*)android_tts_SynthProxy_shutdown + }, + { "native_setup", + "(Ljava/lang/Object;Ljava/lang/String;)V", + (void*)android_tts_SynthProxy_native_setup + }, + { "native_finalize", + "(I)V", + (void*)android_tts_SynthProxy_native_finalize + } +}; + +#define SP_JNIDATA_FIELD_NAME "mJniData" +#define SP_POSTSPEECHSYNTHESIZED_METHOD_NAME "postNativeSpeechSynthesizedInJava" + +// TODO: verify this is the correct path +static const char* const kClassPathName = "android/tts/SynthProxy"; + +jint JNI_OnLoad(JavaVM* vm, void* reserved) +{ + JNIEnv* env = NULL; + jint result = -1; + jclass clazz; + + if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) { + LOGE("ERROR: GetEnv failed\n"); + goto bail; + } + assert(env != NULL); + + clazz = env->FindClass(kClassPathName); + if (clazz == NULL) { + LOGE("Can't find %s", kClassPathName); + goto bail; + } + + javaTTSFields.synthProxyClass = clazz; + javaTTSFields.synthProxyFieldJniData = NULL; + javaTTSFields.synthProxyMethodPost = NULL; + + javaTTSFields.synthProxyFieldJniData = env->GetFieldID(clazz, + SP_JNIDATA_FIELD_NAME, "I"); + if (javaTTSFields.synthProxyFieldJniData == NULL) { + LOGE("Can't find %s.%s field", kClassPathName, SP_JNIDATA_FIELD_NAME); + goto bail; + } + + javaTTSFields.synthProxyMethodPost = env->GetStaticMethodID(clazz, + SP_POSTSPEECHSYNTHESIZED_METHOD_NAME, "(Ljava/lang/Object;II)V"); + if (javaTTSFields.synthProxyMethodPost == NULL) { + LOGE("Can't find %s.%s method", kClassPathName, SP_POSTSPEECHSYNTHESIZED_METHOD_NAME); + goto bail; + } + + if (jniRegisterNativeMethods( + env, kClassPathName, gMethods, NELEM(gMethods)) < 0) + goto bail; + + /* success -- return valid version number */ + result = JNI_VERSION_1_4; + + bail: + return result; +} |