diff options
Diffstat (limited to 'tts')
-rwxr-xr-x | tts/java/android/tts/SynthProxy.java | 172 | ||||
-rwxr-xr-x | tts/java/android/tts/TtsService.java | 783 | ||||
-rwxr-xr-x | tts/jni/Android.mk | 34 | ||||
-rwxr-xr-x | tts/jni/android_tts_SynthProxy.cpp | 595 |
4 files changed, 1584 insertions, 0 deletions
diff --git a/tts/java/android/tts/SynthProxy.java b/tts/java/android/tts/SynthProxy.java new file mode 100755 index 0000000..4ed9754 --- /dev/null +++ b/tts/java/android/tts/SynthProxy.java @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.tts; + +import android.util.Log; +import java.lang.ref.WeakReference; + +/** + * @hide + * + * The SpeechSynthesis class provides a high-level api to create and play + * synthesized speech. This class is used internally to talk to a native + * TTS library that implements the interface defined in + * frameworks/base/include/tts/TtsEngine.h + * + */ +@SuppressWarnings("unused") +public class SynthProxy { + + // + // External API + // + + /** + * Constructor; pass the location of the native TTS .so to use. + */ + public SynthProxy(String nativeSoLib) { + Log.e("TTS is loading", nativeSoLib); + native_setup(new WeakReference<SynthProxy>(this), nativeSoLib); + } + + /** + * Stops and clears the AudioTrack. + */ + public void stop() { + native_stop(mJniData); + } + + /** + * Synthesize speech and speak it directly using AudioTrack. + */ + public void speak(String text) { + native_speak(mJniData, text); + } + + /** + * Synthesize speech to a file. The current implementation writes a valid + * WAV file to the given path, assuming it is writable. Something like + * "/sdcard/???.wav" is recommended. + */ + public void synthesizeToFile(String text, String filename) { + native_synthesizeToFile(mJniData, text, filename); + } + + // TODO add IPA methods + + /** + * Sets the language + */ + public void setLanguage(String language) { + native_setLanguage(mJniData, language); + } + + /** + * Sets the speech rate + */ + public final void setSpeechRate(int speechRate) { + native_setSpeechRate(mJniData, speechRate); + } + + + /** + * Plays the given audio buffer + */ + public void playAudioBuffer(int bufferPointer, int bufferSize) { + native_playAudioBuffer(mJniData, bufferPointer, bufferSize); + } + + /** + * Gets the currently set language + */ + public String getLanguage() { + return native_getLanguage(mJniData); + } + + /** + * Gets the currently set rate + */ + public int getRate() { + return native_getRate(mJniData); + } + + /** + * Shuts down the native synthesizer + */ + public void shutdown() { + native_shutdown(mJniData); + } + + // + // Internal + // + + protected void finalize() { + native_finalize(mJniData); + mJniData = 0; + } + + static { + System.loadLibrary("synthproxy"); + } + + private final static String TAG = "SynthProxy"; + + /** + * Accessed by native methods + */ + private int mJniData = 0; + + private native final void native_setup(Object weak_this, + String nativeSoLib); + + private native final void native_finalize(int jniData); + + private native final void native_stop(int jniData); + + private native final void native_speak(int jniData, String text); + + private native final void native_synthesizeToFile(int jniData, String text, String filename); + + private native final void native_setLanguage(int jniData, String language); + + private native final void native_setSpeechRate(int jniData, int speechRate); + + // TODO add buffer format + private native final void native_playAudioBuffer(int jniData, int bufferPointer, int bufferSize); + + private native final String native_getLanguage(int jniData); + + private native final int native_getRate(int jniData); + + private native final void native_shutdown(int jniData); + + + /** + * Callback from the C layer + */ + @SuppressWarnings("unused") + private static void postNativeSpeechSynthesizedInJava(Object tts_ref, + int bufferPointer, int bufferSize) { + + Log.i("TTS plugin debug", "bufferPointer: " + bufferPointer + + " bufferSize: " + bufferSize); + + SynthProxy nativeTTS = (SynthProxy)((WeakReference)tts_ref).get(); + // TODO notify TTS service of synthesis/playback completion, + // method definition to be changed. + } +} diff --git a/tts/java/android/tts/TtsService.java b/tts/java/android/tts/TtsService.java new file mode 100755 index 0000000..4b794db --- /dev/null +++ b/tts/java/android/tts/TtsService.java @@ -0,0 +1,783 @@ +/* + * Copyright (C) 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.tts; + +import android.tts.ITts.Stub; + +import android.app.Service; +import android.content.Context; +import android.content.Intent; +import android.content.SharedPreferences; +import android.content.pm.PackageManager; +import android.content.pm.ResolveInfo; +import android.content.pm.PackageManager.NameNotFoundException; +import android.media.MediaPlayer; +import android.media.MediaPlayer.OnCompletionListener; +import android.net.Uri; +import android.os.IBinder; +import android.os.RemoteCallbackList; +import android.os.RemoteException; +import android.preference.PreferenceManager; +import android.util.Log; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.concurrent.locks.ReentrantLock; + +/** + * @hide Synthesizes speech from text. This is implemented as a service so that + * other applications can call the TTS without needing to bundle the TTS + * in the build. + * + */ +public class TtsService extends Service implements OnCompletionListener { + + private class SpeechItem { + public static final int SPEECH = 0; + public static final int EARCON = 1; + public static final int SILENCE = 2; + public String mText = null; + public ArrayList<String> mParams = null; + public int mType = SPEECH; + public long mDuration = 0; + + public SpeechItem(String text, ArrayList<String> params, int itemType) { + mText = text; + mParams = params; + mType = itemType; + } + + public SpeechItem(long silenceTime) { + mDuration = silenceTime; + } + } + + /** + * Contains the information needed to access a sound resource; the name of + * the package that contains the resource and the resID of the resource + * within that package. + */ + private class SoundResource { + public String mSourcePackageName = null; + public int mResId = -1; + public String mFilename = null; + + public SoundResource(String packageName, int id) { + mSourcePackageName = packageName; + mResId = id; + mFilename = null; + } + + public SoundResource(String file) { + mSourcePackageName = null; + mResId = -1; + mFilename = file; + } + } + + private static final String ACTION = "android.intent.action.USE_TTS"; + private static final String CATEGORY = "android.intent.category.TTS"; + private static final String PKGNAME = "android.tts"; + + final RemoteCallbackList<ITtsCallback> mCallbacks = new RemoteCallbackList<ITtsCallback>(); + + private Boolean isSpeaking; + private ArrayList<SpeechItem> speechQueue; + private HashMap<String, SoundResource> earcons; + private HashMap<String, SoundResource> utterances; + private MediaPlayer player; + private TtsService self; + + private SharedPreferences prefs; + + private final ReentrantLock speechQueueLock = new ReentrantLock(); + private final ReentrantLock synthesizerLock = new ReentrantLock(); + + // TODO support multiple SpeechSynthesis objects + private SynthProxy nativeSynth; + + @Override + public void onCreate() { + super.onCreate(); + Log.i("TTS", "TTS starting"); + + + // TODO: Make this work when the settings are done in the main Settings + // app. + prefs = PreferenceManager.getDefaultSharedPreferences(this); + + // TODO: This should be changed to work by requesting the path + // from the default engine. + nativeSynth = new SynthProxy(prefs.getString("engine_pref", "")); + + + self = this; + isSpeaking = false; + + earcons = new HashMap<String, SoundResource>(); + utterances = new HashMap<String, SoundResource>(); + + speechQueue = new ArrayList<SpeechItem>(); + player = null; + + setLanguage(prefs.getString("lang_pref", "en-rUS")); + setSpeechRate(Integer.parseInt(prefs.getString("rate_pref", "140"))); + } + + @Override + public void onDestroy() { + super.onDestroy(); + // Don't hog the media player + cleanUpPlayer(); + + nativeSynth.shutdown(); + + // Unregister all callbacks. + mCallbacks.kill(); + } + + private void setSpeechRate(int rate) { + if (prefs.getBoolean("override_pref", false)) { + // This is set to the default here so that the preview in the prefs + // activity will show the change without a restart, even if apps are + // not allowed to change the defaults. + rate = Integer.parseInt(prefs.getString("rate_pref", "140")); + } + nativeSynth.setSpeechRate(rate); + } + + private void setLanguage(String lang) { + if (prefs.getBoolean("override_pref", false)) { + // This is set to the default here so that the preview in the prefs + // activity will show the change without a restart, even if apps are + // not + // allowed to change the defaults. + lang = prefs.getString("lang_pref", "en-rUS"); + } + nativeSynth.setLanguage(lang); + } + + private void setEngine(String engineName, String[] requestedLanguages, + int strictness) { + // TODO: Implement engine selection code here. + Intent engineIntent = new Intent( + "android.intent.action.START_TTS_ENGINE"); + if (engineName != null) { + engineIntent.addCategory("android.intent.action.tts_engine." + + engineName); + } + for (int i = 0; i < requestedLanguages.length; i++) { + engineIntent.addCategory("android.intent.action.tts_lang." + + requestedLanguages[i]); + } + ResolveInfo[] enginesArray = new ResolveInfo[0]; + PackageManager pm = getPackageManager(); + enginesArray = pm.queryIntentActivities(engineIntent, 0).toArray( + enginesArray); + } + + private void setEngine(Intent engineIntent) { + // TODO: Implement engine selection code here. + } + + private int getEngineStatus() { + // TODO: Proposal - add a sanity check method that + // TTS engine plugins must implement. + return 0; + } + + /** + * Adds a sound resource to the TTS. + * + * @param text + * The text that should be associated with the sound resource + * @param packageName + * The name of the package which has the sound resource + * @param resId + * The resource ID of the sound within its package + */ + private void addSpeech(String text, String packageName, int resId) { + utterances.put(text, new SoundResource(packageName, resId)); + } + + /** + * Adds a sound resource to the TTS. + * + * @param text + * The text that should be associated with the sound resource + * @param filename + * The filename of the sound resource. This must be a complete + * path like: (/sdcard/mysounds/mysoundbite.mp3). + */ + private void addSpeech(String text, String filename) { + utterances.put(text, new SoundResource(filename)); + } + + /** + * Adds a sound resource to the TTS as an earcon. + * + * @param earcon + * The text that should be associated with the sound resource + * @param packageName + * The name of the package which has the sound resource + * @param resId + * The resource ID of the sound within its package + */ + private void addEarcon(String earcon, String packageName, int resId) { + earcons.put(earcon, new SoundResource(packageName, resId)); + } + + /** + * Adds a sound resource to the TTS as an earcon. + * + * @param earcon + * The text that should be associated with the sound resource + * @param filename + * The filename of the sound resource. This must be a complete + * path like: (/sdcard/mysounds/mysoundbite.mp3). + */ + private void addEarcon(String earcon, String filename) { + earcons.put(earcon, new SoundResource(filename)); + } + + /** + * Speaks the given text using the specified queueing mode and parameters. + * + * @param text + * The text that should be spoken + * @param queueMode + * 0 for no queue (interrupts all previous utterances), 1 for + * queued + * @param params + * An ArrayList of parameters. This is not implemented for all + * engines. + */ + private void speak(String text, int queueMode, ArrayList<String> params) { + if (queueMode == 0) { + stop(); + } + speechQueue.add(new SpeechItem(text, params, SpeechItem.SPEECH)); + if (!isSpeaking) { + processSpeechQueue(); + } + } + + /** + * Plays the earcon using the specified queueing mode and parameters. + * + * @param earcon + * The earcon that should be played + * @param queueMode + * 0 for no queue (interrupts all previous utterances), 1 for + * queued + * @param params + * An ArrayList of parameters. This is not implemented for all + * engines. + */ + private void playEarcon(String earcon, int queueMode, + ArrayList<String> params) { + if (queueMode == 0) { + stop(); + } + speechQueue.add(new SpeechItem(earcon, params, SpeechItem.EARCON)); + if (!isSpeaking) { + processSpeechQueue(); + } + } + + /** + * Stops all speech output and removes any utterances still in the queue. + */ + private void stop() { + Log.i("TTS", "Stopping"); + speechQueue.clear(); + + nativeSynth.stop(); + isSpeaking = false; + if (player != null) { + try { + player.stop(); + } catch (IllegalStateException e) { + // Do nothing, the player is already stopped. + } + } + Log.i("TTS", "Stopped"); + } + + public void onCompletion(MediaPlayer arg0) { + processSpeechQueue(); + } + + private void playSilence(long duration, int queueMode, + ArrayList<String> params) { + if (queueMode == 0) { + stop(); + } + speechQueue.add(new SpeechItem(duration)); + if (!isSpeaking) { + processSpeechQueue(); + } + } + + private void silence(final long duration) { + class SilenceThread implements Runnable { + public void run() { + try { + Thread.sleep(duration); + } catch (InterruptedException e) { + e.printStackTrace(); + } finally { + processSpeechQueue(); + } + } + } + Thread slnc = (new Thread(new SilenceThread())); + slnc.setPriority(Thread.MIN_PRIORITY); + slnc.start(); + } + + private void speakInternalOnly(final String text, + final ArrayList<String> params) { + class SynthThread implements Runnable { + public void run() { + boolean synthAvailable = false; + try { + synthAvailable = synthesizerLock.tryLock(); + if (!synthAvailable) { + Thread.sleep(100); + Thread synth = (new Thread(new SynthThread())); + synth.setPriority(Thread.MIN_PRIORITY); + synth.start(); + return; + } + nativeSynth.speak(text); + } catch (InterruptedException e) { + e.printStackTrace(); + } finally { + // This check is needed because finally will always run; + // even if the + // method returns somewhere in the try block. + if (synthAvailable) { + synthesizerLock.unlock(); + } + } + } + } + Thread synth = (new Thread(new SynthThread())); + synth.setPriority(Thread.MIN_PRIORITY); + synth.start(); + } + + private SoundResource getSoundResource(SpeechItem speechItem) { + SoundResource sr = null; + String text = speechItem.mText; + if (speechItem.mType == SpeechItem.SILENCE) { + // Do nothing if this is just silence + } else if (speechItem.mType == SpeechItem.EARCON) { + sr = earcons.get(text); + } else { + sr = utterances.get(text); + } + return sr; + } + + private void dispatchSpeechCompletedCallbacks(String mark) { + Log.i("TTS callback", "dispatch started"); + // Broadcast to all clients the new value. + final int N = mCallbacks.beginBroadcast(); + for (int i = 0; i < N; i++) { + try { + mCallbacks.getBroadcastItem(i).markReached(mark); + } catch (RemoteException e) { + // The RemoteCallbackList will take care of removing + // the dead object for us. + } + } + mCallbacks.finishBroadcast(); + Log.i("TTS callback", "dispatch completed to " + N); + } + + private void processSpeechQueue() { + boolean speechQueueAvailable = false; + try { + speechQueueAvailable = speechQueueLock.tryLock(); + if (!speechQueueAvailable) { + return; + } + if (speechQueue.size() < 1) { + isSpeaking = false; + // Dispatch a completion here as this is the + // only place where speech completes normally. + // Nothing left to say in the queue is a special case + // that is always a "mark" - associated text is null. + dispatchSpeechCompletedCallbacks(""); + return; + } + + SpeechItem currentSpeechItem = speechQueue.get(0); + isSpeaking = true; + SoundResource sr = getSoundResource(currentSpeechItem); + // Synth speech as needed - synthesizer should call + // processSpeechQueue to continue running the queue + Log.i("TTS processing: ", currentSpeechItem.mText); + if (sr == null) { + if (currentSpeechItem.mType == SpeechItem.SPEECH) { + // TODO: Split text up into smaller chunks before accepting + // them + // for processing. + speakInternalOnly(currentSpeechItem.mText, + currentSpeechItem.mParams); + } else { + // This is either silence or an earcon that was missing + silence(currentSpeechItem.mDuration); + } + } else { + cleanUpPlayer(); + if (sr.mSourcePackageName == PKGNAME) { + // Utterance is part of the TTS library + player = MediaPlayer.create(this, sr.mResId); + } else if (sr.mSourcePackageName != null) { + // Utterance is part of the app calling the library + Context ctx; + try { + ctx = this.createPackageContext(sr.mSourcePackageName, + 0); + } catch (NameNotFoundException e) { + e.printStackTrace(); + speechQueue.remove(0); // Remove it from the queue and + // move on + isSpeaking = false; + return; + } + player = MediaPlayer.create(ctx, sr.mResId); + } else { + // Utterance is coming from a file + player = MediaPlayer.create(this, Uri.parse(sr.mFilename)); + } + + // Check if Media Server is dead; if it is, clear the queue and + // give up for now - hopefully, it will recover itself. + if (player == null) { + speechQueue.clear(); + isSpeaking = false; + return; + } + player.setOnCompletionListener(this); + try { + player.start(); + } catch (IllegalStateException e) { + speechQueue.clear(); + isSpeaking = false; + cleanUpPlayer(); + return; + } + } + if (speechQueue.size() > 0) { + speechQueue.remove(0); + } + } finally { + // This check is needed because finally will always run; even if the + // method returns somewhere in the try block. + if (speechQueueAvailable) { + speechQueueLock.unlock(); + } + } + } + + private void cleanUpPlayer() { + if (player != null) { + player.release(); + player = null; + } + } + + /** + * Synthesizes the given text using the specified queuing mode and + * parameters. + * + * @param text + * The String of text that should be synthesized + * @param params + * An ArrayList of parameters. The first element of this array + * controls the type of voice to use. + * @param filename + * The string that gives the full output filename; it should be + * something like "/sdcard/myappsounds/mysound.wav". + * @return A boolean that indicates if the synthesis succeeded + */ + private boolean synthesizeToFile(String text, ArrayList<String> params, + String filename, boolean calledFromApi) { + // Only stop everything if this is a call made by an outside app trying + // to + // use the API. Do NOT stop if this is a call from within the service as + // clearing the speech queue here would be a mistake. + if (calledFromApi) { + stop(); + } + Log.i("TTS", "Synthesizing to " + filename); + boolean synthAvailable = false; + try { + synthAvailable = synthesizerLock.tryLock(); + if (!synthAvailable) { + return false; + } + // Don't allow a filename that is too long + // TODO use platform constant + if (filename.length() > 250) { + return false; + } + nativeSynth.synthesizeToFile(text, filename); + } finally { + // This check is needed because finally will always run; even if the + // method returns somewhere in the try block. + if (synthAvailable) { + synthesizerLock.unlock(); + } + } + Log.i("TTS", "Completed synthesis for " + filename); + return true; + } + + @Override + public IBinder onBind(Intent intent) { + if (ACTION.equals(intent.getAction())) { + for (String category : intent.getCategories()) { + if (category.equals(CATEGORY)) { + return mBinder; + } + } + } + return null; + } + + private final ITts.Stub mBinder = new Stub() { + + public void registerCallback(ITtsCallback cb) { + if (cb != null) + mCallbacks.register(cb); + } + + public void unregisterCallback(ITtsCallback cb) { + if (cb != null) + mCallbacks.unregister(cb); + } + + /** + * Gives a hint about the type of engine that is preferred. + * + * @param selectedEngine + * The TTS engine that should be used + */ + public void setEngine(String engineName, String[] supportedLanguages, + int strictness) { + self.setEngine(engineName, supportedLanguages, strictness); + } + + /** + * Specifies exactly what the engine has to support. Will always be + * considered "strict"; can be used for implementing + * optional/experimental features that are not supported by all engines. + * + * @param engineIntent + * An intent that specifies exactly what the engine has to + * support. + */ + public void setEngineWithIntent(Intent engineIntent) { + self.setEngine(engineIntent); + } + + /** + * Speaks the given text using the specified queueing mode and + * parameters. + * + * @param text + * The text that should be spoken + * @param queueMode + * 0 for no queue (interrupts all previous utterances), 1 for + * queued + * @param params + * An ArrayList of parameters. The first element of this + * array controls the type of voice to use. + */ + public void speak(String text, int queueMode, String[] params) { + ArrayList<String> speakingParams = new ArrayList<String>(); + if (params != null) { + speakingParams = new ArrayList<String>(Arrays.asList(params)); + } + self.speak(text, queueMode, speakingParams); + } + + /** + * Plays the earcon using the specified queueing mode and parameters. + * + * @param earcon + * The earcon that should be played + * @param queueMode + * 0 for no queue (interrupts all previous utterances), 1 for + * queued + * @param params + * An ArrayList of parameters. + */ + public void playEarcon(String earcon, int queueMode, String[] params) { + ArrayList<String> speakingParams = new ArrayList<String>(); + if (params != null) { + speakingParams = new ArrayList<String>(Arrays.asList(params)); + } + self.playEarcon(earcon, queueMode, speakingParams); + } + + /** + * Plays the silence using the specified queueing mode and parameters. + * + * @param duration + * The duration of the silence that should be played + * @param queueMode + * 0 for no queue (interrupts all previous utterances), 1 for + * queued + * @param params + * An ArrayList of parameters. + */ + public void playSilence(long duration, int queueMode, String[] params) { + ArrayList<String> speakingParams = new ArrayList<String>(); + if (params != null) { + speakingParams = new ArrayList<String>(Arrays.asList(params)); + } + self.playSilence(duration, queueMode, speakingParams); + } + + + /** + * Stops all speech output and removes any utterances still in the + * queue. + */ + public void stop() { + self.stop(); + } + + /** + * Returns whether or not the TTS is speaking. + * + * @return Boolean to indicate whether or not the TTS is speaking + */ + public boolean isSpeaking() { + return (self.isSpeaking && (speechQueue.size() < 1)); + } + + /** + * Adds a sound resource to the TTS. + * + * @param text + * The text that should be associated with the sound resource + * @param packageName + * The name of the package which has the sound resource + * @param resId + * The resource ID of the sound within its package + */ + public void addSpeech(String text, String packageName, int resId) { + self.addSpeech(text, packageName, resId); + } + + /** + * Adds a sound resource to the TTS. + * + * @param text + * The text that should be associated with the sound resource + * @param filename + * The filename of the sound resource. This must be a + * complete path like: (/sdcard/mysounds/mysoundbite.mp3). + */ + public void addSpeechFile(String text, String filename) { + self.addSpeech(text, filename); + } + + /** + * Adds a sound resource to the TTS as an earcon. + * + * @param earcon + * The text that should be associated with the sound resource + * @param packageName + * The name of the package which has the sound resource + * @param resId + * The resource ID of the sound within its package + */ + public void addEarcon(String earcon, String packageName, int resId) { + self.addEarcon(earcon, packageName, resId); + } + + /** + * Adds a sound resource to the TTS as an earcon. + * + * @param earcon + * The text that should be associated with the sound resource + * @param filename + * The filename of the sound resource. This must be a + * complete path like: (/sdcard/mysounds/mysoundbite.mp3). + */ + public void addEarconFile(String earcon, String filename) { + self.addEarcon(earcon, filename); + } + + /** + * Sets the speech rate for the TTS. Note that this will only have an + * effect on synthesized speech; it will not affect pre-recorded speech. + * + * @param speechRate + * The speech rate that should be used + */ + public void setSpeechRate(int speechRate) { + self.setSpeechRate(speechRate); + } + + // TODO: Fix comment about language + /** + * Sets the speech rate for the TTS. Note that this will only have an + * effect on synthesized speech; it will not affect pre-recorded speech. + * + * @param language + * The language to be used. The languages are specified by + * their IETF language tags as defined by BCP 47. This is the + * same standard used for the lang attribute in HTML. See: + * http://en.wikipedia.org/wiki/IETF_language_tag + */ + public void setLanguage(String language) { + self.setLanguage(language); + } + + /** + * Speaks the given text using the specified queueing mode and + * parameters. + * + * @param text + * The String of text that should be synthesized + * @param params + * An ArrayList of parameters. The first element of this + * array controls the type of voice to use. + * @param filename + * The string that gives the full output filename; it should + * be something like "/sdcard/myappsounds/mysound.wav". + * @return A boolean that indicates if the synthesis succeeded + */ + public boolean synthesizeToFile(String text, String[] params, + String filename) { + ArrayList<String> speakingParams = new ArrayList<String>(); + if (params != null) { + speakingParams = new ArrayList<String>(Arrays.asList(params)); + } + return self.synthesizeToFile(text, speakingParams, filename, true); + } + }; + +} diff --git a/tts/jni/Android.mk b/tts/jni/Android.mk new file mode 100755 index 0000000..bb76583 --- /dev/null +++ b/tts/jni/Android.mk @@ -0,0 +1,34 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + android_tts_SynthProxy.cpp + +LOCAL_C_INCLUDES += \ + $(JNI_H_INCLUDE) + +LOCAL_SHARED_LIBRARIES := \ + libandroid_runtime \ + libnativehelper \ + libmedia \ + libutils \ + libcutils + +ifneq ($(TARGET_SIMULATOR),true) +LOCAL_SHARED_LIBRARIES += \ + libdl +endif + +ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true) +LOCAL_LDLIBS += -ldl +endif + + +LOCAL_MODULE:= libttssynthproxy + +LOCAL_ARM_MODE := arm + +LOCAL_PRELINK_MODULE := false + +include $(BUILD_SHARED_LIBRARY) + diff --git a/tts/jni/android_tts_SynthProxy.cpp b/tts/jni/android_tts_SynthProxy.cpp new file mode 100755 index 0000000..d8f1bf3 --- /dev/null +++ b/tts/jni/android_tts_SynthProxy.cpp @@ -0,0 +1,595 @@ +/* + * Copyright (C) 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <stdio.h> +#include <unistd.h> + +#define LOG_TAG "SynthProxy" + +#include <utils/Log.h> +#include <nativehelper/jni.h> +#include <nativehelper/JNIHelp.h> +#include <android_runtime/AndroidRuntime.h> +#include <tts/TtsEngine.h> +#include <media/AudioTrack.h> + +#include <dlfcn.h> + +#define DEFAULT_TTS_RATE 16000 +#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT +#define DEFAULT_TTS_NB_CHANNELS 1 + +#define USAGEMODE_PLAY_IMMEDIATELY 0 +#define USAGEMODE_WRITE_TO_FILE 1 + +using namespace android; + +// ---------------------------------------------------------------------------- +struct fields_t { + jfieldID synthProxyFieldJniData; + jclass synthProxyClass; + jmethodID synthProxyMethodPost; +}; + +struct afterSynthData_t { + jint jniStorage; + int usageMode; + FILE* outputFile; +}; + +// ---------------------------------------------------------------------------- +static fields_t javaTTSFields; + +// ---------------------------------------------------------------------------- +class SynthProxyJniStorage { + public : + //jclass tts_class; + jobject tts_ref; + TtsEngine* mNativeSynthInterface; + AudioTrack* mAudioOut; + uint32_t mSampleRate; + AudioSystem::audio_format mAudFormat; + int mNbChannels; + + SynthProxyJniStorage() { + //tts_class = NULL; + tts_ref = NULL; + mNativeSynthInterface = NULL; + mAudioOut = NULL; + mSampleRate = DEFAULT_TTS_RATE; + mAudFormat = DEFAULT_TTS_FORMAT; + mNbChannels = DEFAULT_TTS_NB_CHANNELS; + } + + ~SynthProxyJniStorage() { + killAudio(); + if (mNativeSynthInterface) { + mNativeSynthInterface->shutdown(); + mNativeSynthInterface = NULL; + } + } + + void killAudio() { + if (mAudioOut) { + mAudioOut->stop(); + delete mAudioOut; + mAudioOut = NULL; + } + } + + void createAudioOut(uint32_t rate, AudioSystem::audio_format format, + int channel) { + mSampleRate = rate; + mAudFormat = format; + mNbChannels = channel; + + // TODO use the TTS stream type + int streamType = AudioSystem::MUSIC; + + // retrieve system properties to ensure successful creation of the + // AudioTrack object for playback + int afSampleRate; + if (AudioSystem::getOutputSamplingRate(&afSampleRate, streamType) != NO_ERROR) { + afSampleRate = 44100; + } + int afFrameCount; + if (AudioSystem::getOutputFrameCount(&afFrameCount, streamType) != NO_ERROR) { + afFrameCount = 2048; + } + uint32_t afLatency; + if (AudioSystem::getOutputLatency(&afLatency, streamType) != NO_ERROR) { + afLatency = 500; + } + uint32_t minBufCount = afLatency / ((1000 * afFrameCount)/afSampleRate); + if (minBufCount < 2) minBufCount = 2; + int minFrameCount = (afFrameCount * rate * minBufCount)/afSampleRate; + + mAudioOut = new AudioTrack(streamType, rate, format, channel, + minFrameCount > 4096 ? minFrameCount : 4096, + 0, 0, 0, 0); // not using an AudioTrack callback + + if (mAudioOut->initCheck() != NO_ERROR) { + LOGI("AudioTrack error"); + delete mAudioOut; + mAudioOut = NULL; + } else { + LOGI("AudioTrack OK"); + mAudioOut->start(); + LOGI("AudioTrack started"); + } + } +}; + + +// ---------------------------------------------------------------------------- +void prepAudioTrack(SynthProxyJniStorage* pJniData, + uint32_t rate, AudioSystem::audio_format format, int channel) +{ + // Don't bother creating a new audiotrack object if the current + // object is already set. + if ( pJniData->mAudioOut && + (rate == pJniData->mSampleRate) && + (format == pJniData->mAudFormat) && + (channel == pJniData->mNbChannels) ){ + return; + } + if (pJniData->mAudioOut){ + pJniData->killAudio(); + } + pJniData->createAudioOut(rate, format, channel); +} + + +// ---------------------------------------------------------------------------- +/* + * Callback from TTS engine. + * Directly speaks using AudioTrack or write to file + */ +static void ttsSynthDoneCB(void * userdata, uint32_t rate, + AudioSystem::audio_format format, int channel, + int8_t *wav, size_t bufferSize) { + LOGI("ttsSynthDoneCallback: %d bytes", bufferSize); + + afterSynthData_t* pForAfter = (afterSynthData_t*)userdata; + + if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){ + LOGI("Direct speech"); + + if (wav == NULL) { + LOGI("Null: speech has completed"); + } + + if (bufferSize > 0) { + SynthProxyJniStorage* pJniData = + (SynthProxyJniStorage*)(pForAfter->jniStorage); + prepAudioTrack(pJniData, rate, format, channel); + if (pJniData->mAudioOut) { + pJniData->mAudioOut->write(wav, bufferSize); + LOGI("AudioTrack wrote: %d bytes", bufferSize); + } else { + LOGI("Can't play, null audiotrack"); + } + } + } else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) { + LOGI("Save to file"); + if (wav == NULL) { + LOGI("Null: speech has completed"); + } + if (bufferSize > 0){ + fwrite(wav, 1, bufferSize, pForAfter->outputFile); + } + } + // TODO update to call back into the SynthProxy class through the + // javaTTSFields.synthProxyMethodPost methode to notify + // playback has completed + + delete pForAfter; + return; +} + + +// ---------------------------------------------------------------------------- +static void +android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz, + jobject weak_this, jstring nativeSoLib) +{ + SynthProxyJniStorage* pJniStorage = new SynthProxyJniStorage(); + + prepAudioTrack(pJniStorage, + DEFAULT_TTS_RATE, DEFAULT_TTS_FORMAT, DEFAULT_TTS_NB_CHANNELS); + + const char *nativeSoLibNativeString = + env->GetStringUTFChars(nativeSoLib, 0); + + void *engine_lib_handle = dlopen(nativeSoLibNativeString, + RTLD_NOW | RTLD_LOCAL); + if (engine_lib_handle==NULL) { + LOGI("engine_lib_handle==NULL"); + // TODO report error so the TTS can't be used + } else { + TtsEngine *(*get_TtsEngine)() = + reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine")); + pJniStorage->mNativeSynthInterface = (*get_TtsEngine)(); + if (pJniStorage->mNativeSynthInterface) { + pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB); + } + } + + // we use a weak reference so the SynthProxy object can be garbage collected. + pJniStorage->tts_ref = env->NewGlobalRef(weak_this); + + // save the JNI resources so we can use them (and free them) later + env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, + (int)pJniStorage); + + env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString); +} + + +static void +android_tts_SynthProxy_native_finalize(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData) { + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + delete pSynthData; + } +} + + +static void +android_tts_SynthProxy_setLanguage(JNIEnv *env, jobject thiz, jint jniData, + jstring language) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_setLanguage(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + const char *langNativeString = env->GetStringUTFChars(language, 0); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->setLanguage(langNativeString, + strlen(langNativeString)); + } + env->ReleaseStringUTFChars(language, langNativeString); +} + + +static void +android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData, + int speechRate) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_setSpeechRate(): invalid JNI data"); + return; + } + + int bufSize = 10; + char buffer [bufSize]; + sprintf(buffer, "%d", speechRate); + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + LOGI("setting speech rate to %d", speechRate); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->setProperty("rate", buffer, bufSize); + } +} + + +// TODO: Refactor this to get rid of any assumptions about sample rate, etc. +static void +android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData, + jstring textJavaString, jstring filenameJavaString) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + + const char *filenameNativeString = + env->GetStringUTFChars(filenameJavaString, 0); + const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); + + afterSynthData_t* pForAfter = new (afterSynthData_t); + pForAfter->jniStorage = jniData; + pForAfter->usageMode = USAGEMODE_WRITE_TO_FILE; + + pForAfter->outputFile = fopen(filenameNativeString, "wb"); + + // Write 44 blank bytes for WAV header, then come back and fill them in + // after we've written the audio data + char header[44]; + fwrite(header, 1, 44, pForAfter->outputFile); + + unsigned int unique_identifier; + + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, + (void *)pForAfter); + } + + long filelen = ftell(pForAfter->outputFile); + + int samples = (((int)filelen) - 44) / 2; + header[0] = 'R'; + header[1] = 'I'; + header[2] = 'F'; + header[3] = 'F'; + ((uint32_t *)(&header[4]))[0] = filelen - 8; + header[8] = 'W'; + header[9] = 'A'; + header[10] = 'V'; + header[11] = 'E'; + + header[12] = 'f'; + header[13] = 'm'; + header[14] = 't'; + header[15] = ' '; + + ((uint32_t *)(&header[16]))[0] = 16; // size of fmt + + ((unsigned short *)(&header[20]))[0] = 1; // format + ((unsigned short *)(&header[22]))[0] = 1; // channels + ((uint32_t *)(&header[24]))[0] = 22050; // samplerate + ((uint32_t *)(&header[28]))[0] = 44100; // byterate + ((unsigned short *)(&header[32]))[0] = 2; // block align + ((unsigned short *)(&header[34]))[0] = 16; // bits per sample + + header[36] = 'd'; + header[37] = 'a'; + header[38] = 't'; + header[39] = 'a'; + + ((uint32_t *)(&header[40]))[0] = samples * 2; // size of data + + // Skip back to the beginning and rewrite the header + fseek(pForAfter->outputFile, 0, SEEK_SET); + fwrite(header, 1, 44, pForAfter->outputFile); + + fflush(pForAfter->outputFile); + fclose(pForAfter->outputFile); + + env->ReleaseStringUTFChars(textJavaString, textNativeString); + env->ReleaseStringUTFChars(filenameJavaString, filenameNativeString); +} + + +static void +android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData, + jstring textJavaString) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_speak(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + + if (pSynthData->mAudioOut) { + pSynthData->mAudioOut->stop(); + pSynthData->mAudioOut->start(); + } + + afterSynthData_t* pForAfter = new (afterSynthData_t); + pForAfter->jniStorage = jniData; + pForAfter->usageMode = USAGEMODE_PLAY_IMMEDIATELY; + + if (pSynthData->mNativeSynthInterface) { + const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); + pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, + (void *)pForAfter); + env->ReleaseStringUTFChars(textJavaString, textNativeString); + } +} + + +static void +android_tts_SynthProxy_stop(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_stop(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->stop(); + } + if (pSynthData->mAudioOut) { + pSynthData->mAudioOut->stop(); + } +} + + +static void +android_tts_SynthProxy_shutdown(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_shutdown(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->shutdown(); + pSynthData->mNativeSynthInterface = NULL; + } +} + + +// TODO add buffer format +static void +android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData, + int bufferPointer, int bufferSize) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data"); + return; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + short* wav = (short*) bufferPointer; + pSynthData->mAudioOut->write(wav, bufferSize); + LOGI("AudioTrack wrote: %d bytes", bufferSize); +} + + +JNIEXPORT jstring JNICALL +android_tts_SynthProxy_getLanguage(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_getLanguage(): invalid JNI data"); + return env->NewStringUTF(""); + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + size_t bufSize = 100; + char buf[bufSize]; + memset(buf, 0, bufSize); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->getLanguage(buf, &bufSize); + } + return env->NewStringUTF(buf); +} + +JNIEXPORT int JNICALL +android_tts_SynthProxy_getRate(JNIEnv *env, jobject thiz, jint jniData) +{ + if (jniData == 0) { + LOGE("android_tts_SynthProxy_getRate(): invalid JNI data"); + return 0; + } + + SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; + size_t bufSize = 100; + + char buf[bufSize]; + memset(buf, 0, bufSize); + // TODO check return codes + if (pSynthData->mNativeSynthInterface) { + pSynthData->mNativeSynthInterface->getProperty("rate", buf, &bufSize); + } + return atoi(buf); +} + +// Dalvik VM type signatures +static JNINativeMethod gMethods[] = { + { "native_stop", + "(I)V", + (void*)android_tts_SynthProxy_stop + }, + { "native_speak", + "(ILjava/lang/String;)V", + (void*)android_tts_SynthProxy_speak + }, + { "native_synthesizeToFile", + "(ILjava/lang/String;Ljava/lang/String;)V", + (void*)android_tts_SynthProxy_synthesizeToFile + }, + { "native_setLanguage", + "(ILjava/lang/String;)V", + (void*)android_tts_SynthProxy_setLanguage + }, + { "native_setSpeechRate", + "(II)V", + (void*)android_tts_SynthProxy_setSpeechRate + }, + { "native_playAudioBuffer", + "(III)V", + (void*)android_tts_SynthProxy_playAudioBuffer + }, + { "native_getLanguage", + "(I)Ljava/lang/String;", + (void*)android_tts_SynthProxy_getLanguage + }, + { "native_getRate", + "(I)I", + (void*)android_tts_SynthProxy_getRate + }, + { "native_shutdown", + "(I)V", + (void*)android_tts_SynthProxy_shutdown + }, + { "native_setup", + "(Ljava/lang/Object;Ljava/lang/String;)V", + (void*)android_tts_SynthProxy_native_setup + }, + { "native_finalize", + "(I)V", + (void*)android_tts_SynthProxy_native_finalize + } +}; + +#define SP_JNIDATA_FIELD_NAME "mJniData" +#define SP_POSTSPEECHSYNTHESIZED_METHOD_NAME "postNativeSpeechSynthesizedInJava" + +// TODO: verify this is the correct path +static const char* const kClassPathName = "android/tts/SynthProxy"; + +jint JNI_OnLoad(JavaVM* vm, void* reserved) +{ + JNIEnv* env = NULL; + jint result = -1; + jclass clazz; + + if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) { + LOGE("ERROR: GetEnv failed\n"); + goto bail; + } + assert(env != NULL); + + clazz = env->FindClass(kClassPathName); + if (clazz == NULL) { + LOGE("Can't find %s", kClassPathName); + goto bail; + } + + javaTTSFields.synthProxyClass = clazz; + javaTTSFields.synthProxyFieldJniData = NULL; + javaTTSFields.synthProxyMethodPost = NULL; + + javaTTSFields.synthProxyFieldJniData = env->GetFieldID(clazz, + SP_JNIDATA_FIELD_NAME, "I"); + if (javaTTSFields.synthProxyFieldJniData == NULL) { + LOGE("Can't find %s.%s field", kClassPathName, SP_JNIDATA_FIELD_NAME); + goto bail; + } + + javaTTSFields.synthProxyMethodPost = env->GetStaticMethodID(clazz, + SP_POSTSPEECHSYNTHESIZED_METHOD_NAME, "(Ljava/lang/Object;II)V"); + if (javaTTSFields.synthProxyMethodPost == NULL) { + LOGE("Can't find %s.%s method", kClassPathName, SP_POSTSPEECHSYNTHESIZED_METHOD_NAME); + goto bail; + } + + if (jniRegisterNativeMethods( + env, kClassPathName, gMethods, NELEM(gMethods)) < 0) + goto bail; + + /* success -- return valid version number */ + result = JNI_VERSION_1_4; + + bail: + return result; +} |