diff options
| author | Przemyslaw Szczepaniak <pszczepaniak@google.com> | 2013-06-14 12:02:53 +0100 |
|---|---|---|
| committer | Przemyslaw Szczepaniak <pszczepaniak@google.com> | 2013-10-24 13:42:15 +0100 |
| commit | 90d15d2371ad85f22254be6985455aa2baa5d15d (patch) | |
| tree | e5a1670b39094633e6a9afad65d9bc53c58b7abf /core/java | |
| parent | b9ca1a9a4803afcbef8c19f2bbc110e9f83b7f94 (diff) | |
| download | frameworks_base-90d15d2371ad85f22254be6985455aa2baa5d15d.zip frameworks_base-90d15d2371ad85f22254be6985455aa2baa5d15d.tar.gz frameworks_base-90d15d2371ad85f22254be6985455aa2baa5d15d.tar.bz2 | |
Add new TTS API.
Much of existing API is reused (mostly on the service side). The new API
provides better support for network-based synthesis, defines explicit
synthesis fallback support, has better error handling, allows for multiple
different voices per locale, features enumeration and custom extensions
on a voice level and many more.
If a service does not provide a V2 implementation, a basic adapter
from V2 to existing V1 implementation is provided.
Bug: 8259486
Change-Id: I797b7fc054f11c9b0d930f4dea582e57596ea127
Diffstat (limited to 'core/java')
25 files changed, 3390 insertions, 436 deletions
diff --git a/core/java/android/speech/tts/AbstractEventLogger.java b/core/java/android/speech/tts/AbstractEventLogger.java new file mode 100644 index 0000000..37f8656 --- /dev/null +++ b/core/java/android/speech/tts/AbstractEventLogger.java @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.speech.tts; + +import android.os.SystemClock; + +/** + * Base class for storing data about a given speech synthesis request to the + * event logs. The data that is logged depends on actual implementation. Note + * that {@link AbstractEventLogger#onAudioDataWritten()} and + * {@link AbstractEventLogger#onEngineComplete()} must be called from a single + * thread (usually the audio playback thread}. + */ +abstract class AbstractEventLogger { + protected final String mServiceApp; + protected final int mCallerUid; + protected final int mCallerPid; + protected final long mReceivedTime; + protected long mPlaybackStartTime = -1; + + private volatile long mRequestProcessingStartTime = -1; + private volatile long mEngineStartTime = -1; + private volatile long mEngineCompleteTime = -1; + + private boolean mLogWritten = false; + + AbstractEventLogger(int callerUid, int callerPid, String serviceApp) { + mCallerUid = callerUid; + mCallerPid = callerPid; + mServiceApp = serviceApp; + mReceivedTime = SystemClock.elapsedRealtime(); + } + + /** + * Notifies the logger that this request has been selected from + * the processing queue for processing. Engine latency / total time + * is measured from this baseline. + */ + public void onRequestProcessingStart() { + mRequestProcessingStartTime = SystemClock.elapsedRealtime(); + } + + /** + * Notifies the logger that a chunk of data has been received from + * the engine. Might be called multiple times. + */ + public void onEngineDataReceived() { + if (mEngineStartTime == -1) { + mEngineStartTime = SystemClock.elapsedRealtime(); + } + } + + /** + * Notifies the logger that the engine has finished processing data. + * Will be called exactly once. + */ + public void onEngineComplete() { + mEngineCompleteTime = SystemClock.elapsedRealtime(); + } + + /** + * Notifies the logger that audio playback has started for some section + * of the synthesis. This is normally some amount of time after the engine + * has synthesized data and varies depending on utterances and + * other audio currently in the queue. + */ + public void onAudioDataWritten() { + // For now, keep track of only the first chunk of audio + // that was played. + if (mPlaybackStartTime == -1) { + mPlaybackStartTime = SystemClock.elapsedRealtime(); + } + } + + /** + * Notifies the logger that the current synthesis has completed. + * All available data is not logged. + */ + public void onCompleted(int statusCode) { + if (mLogWritten) { + return; + } else { + mLogWritten = true; + } + + long completionTime = SystemClock.elapsedRealtime(); + + // We don't report latency for stopped syntheses because their overall + // total time spent will be inaccurate (will not correlate with + // the length of the utterance). + + // onAudioDataWritten() should normally always be called, and hence mPlaybackStartTime + // should be set, if an error does not occur. + if (statusCode != TextToSpeechClient.Status.SUCCESS + || mPlaybackStartTime == -1 || mEngineCompleteTime == -1) { + logFailure(statusCode); + return; + } + + final long audioLatency = mPlaybackStartTime - mReceivedTime; + final long engineLatency = mEngineStartTime - mRequestProcessingStartTime; + final long engineTotal = mEngineCompleteTime - mRequestProcessingStartTime; + logSuccess(audioLatency, engineLatency, engineTotal); + } + + protected abstract void logFailure(int statusCode); + protected abstract void logSuccess(long audioLatency, long engineLatency, + long engineTotal); + + +} diff --git a/core/java/android/speech/tts/AbstractSynthesisCallback.java b/core/java/android/speech/tts/AbstractSynthesisCallback.java index c7a4af0..91e119b 100644 --- a/core/java/android/speech/tts/AbstractSynthesisCallback.java +++ b/core/java/android/speech/tts/AbstractSynthesisCallback.java @@ -15,15 +15,28 @@ */ package android.speech.tts; + /** * Defines additional methods the synthesis callback must implement that * are private to the TTS service implementation. + * + * All of these class methods (with the exception of {@link #stop()}) can be only called on the + * synthesis thread, while inside + * {@link TextToSpeechService#onSynthesizeText} or {@link TextToSpeechService#onSynthesizeTextV2}. + * {@link #stop()} is the exception, it may be called from multiple threads. */ abstract class AbstractSynthesisCallback implements SynthesisCallback { + /** If true, request comes from V2 TTS interface */ + protected final boolean mClientIsUsingV2; + /** - * Checks whether the synthesis request completed successfully. + * Constructor. + * @param clientIsUsingV2 If true, this callback will be used inside + * {@link TextToSpeechService#onSynthesizeTextV2} method. */ - abstract boolean isDone(); + AbstractSynthesisCallback(boolean clientIsUsingV2) { + mClientIsUsingV2 = clientIsUsingV2; + } /** * Aborts the speech request. @@ -31,4 +44,16 @@ abstract class AbstractSynthesisCallback implements SynthesisCallback { * Can be called from multiple threads. */ abstract void stop(); + + /** + * Get status code for a "stop". + * + * V2 Clients will receive special status, V1 clients will receive standard error. + * + * This method should only be called on the synthesis thread, + * while in {@link TextToSpeechService#onSynthesizeText}. + */ + int errorCodeOnStop() { + return mClientIsUsingV2 ? TextToSpeechClient.Status.STOPPED : TextToSpeech.ERROR; + } } diff --git a/core/java/android/speech/tts/AudioPlaybackHandler.java b/core/java/android/speech/tts/AudioPlaybackHandler.java index d63f605..dcf49b0 100644 --- a/core/java/android/speech/tts/AudioPlaybackHandler.java +++ b/core/java/android/speech/tts/AudioPlaybackHandler.java @@ -43,7 +43,7 @@ class AudioPlaybackHandler { return; } - item.stop(false); + item.stop(TextToSpeechClient.Status.STOPPED); } public void enqueue(PlaybackQueueItem item) { diff --git a/core/java/android/speech/tts/AudioPlaybackQueueItem.java b/core/java/android/speech/tts/AudioPlaybackQueueItem.java index 1a1fda8..c514639 100644 --- a/core/java/android/speech/tts/AudioPlaybackQueueItem.java +++ b/core/java/android/speech/tts/AudioPlaybackQueueItem.java @@ -53,7 +53,7 @@ class AudioPlaybackQueueItem extends PlaybackQueueItem { dispatcher.dispatchOnStart(); mPlayer = MediaPlayer.create(mContext, mUri); if (mPlayer == null) { - dispatcher.dispatchOnError(); + dispatcher.dispatchOnError(TextToSpeechClient.Status.ERROR_OUTPUT); return; } @@ -83,9 +83,9 @@ class AudioPlaybackQueueItem extends PlaybackQueueItem { } if (mFinished) { - dispatcher.dispatchOnDone(); + dispatcher.dispatchOnSuccess(); } else { - dispatcher.dispatchOnError(); + dispatcher.dispatchOnStop(); } } @@ -99,7 +99,7 @@ class AudioPlaybackQueueItem extends PlaybackQueueItem { } @Override - void stop(boolean isError) { + void stop(int errorCode) { mDone.open(); } } diff --git a/core/java/android/speech/tts/EventLogTags.logtags b/core/java/android/speech/tts/EventLogTags.logtags index f8654ad..e209a28 100644 --- a/core/java/android/speech/tts/EventLogTags.logtags +++ b/core/java/android/speech/tts/EventLogTags.logtags @@ -4,3 +4,6 @@ option java_package android.speech.tts; 76001 tts_speak_success (engine|3),(caller_uid|1),(caller_pid|1),(length|1),(locale|3),(rate|1),(pitch|1),(engine_latency|2|3),(engine_total|2|3),(audio_latency|2|3) 76002 tts_speak_failure (engine|3),(caller_uid|1),(caller_pid|1),(length|1),(locale|3),(rate|1),(pitch|1) + +76003 tts_v2_speak_success (engine|3),(caller_uid|1),(caller_pid|1),(length|1),(request_config|3),(engine_latency|2|3),(engine_total|2|3),(audio_latency|2|3) +76004 tts_v2_speak_failure (engine|3),(caller_uid|1),(caller_pid|1),(length|1),(request_config|3), (statusCode|1) diff --git a/core/java/android/speech/tts/EventLogger.java b/core/java/android/speech/tts/EventLogger.java deleted file mode 100644 index 82ed4dd..0000000 --- a/core/java/android/speech/tts/EventLogger.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package android.speech.tts; - -import android.os.SystemClock; -import android.text.TextUtils; -import android.util.Log; - -/** - * Writes data about a given speech synthesis request to the event logs. - * The data that is logged includes the calling app, length of the utterance, - * speech rate / pitch and the latency and overall time taken. - * - * Note that {@link EventLogger#onStopped()} and {@link EventLogger#onError()} - * might be called from any thread, but on {@link EventLogger#onAudioDataWritten()} and - * {@link EventLogger#onComplete()} must be called from a single thread - * (usually the audio playback thread} - */ -class EventLogger { - private final SynthesisRequest mRequest; - private final String mServiceApp; - private final int mCallerUid; - private final int mCallerPid; - private final long mReceivedTime; - private long mPlaybackStartTime = -1; - private volatile long mRequestProcessingStartTime = -1; - private volatile long mEngineStartTime = -1; - private volatile long mEngineCompleteTime = -1; - - private volatile boolean mError = false; - private volatile boolean mStopped = false; - private boolean mLogWritten = false; - - EventLogger(SynthesisRequest request, int callerUid, int callerPid, String serviceApp) { - mRequest = request; - mCallerUid = callerUid; - mCallerPid = callerPid; - mServiceApp = serviceApp; - mReceivedTime = SystemClock.elapsedRealtime(); - } - - /** - * Notifies the logger that this request has been selected from - * the processing queue for processing. Engine latency / total time - * is measured from this baseline. - */ - public void onRequestProcessingStart() { - mRequestProcessingStartTime = SystemClock.elapsedRealtime(); - } - - /** - * Notifies the logger that a chunk of data has been received from - * the engine. Might be called multiple times. - */ - public void onEngineDataReceived() { - if (mEngineStartTime == -1) { - mEngineStartTime = SystemClock.elapsedRealtime(); - } - } - - /** - * Notifies the logger that the engine has finished processing data. - * Will be called exactly once. - */ - public void onEngineComplete() { - mEngineCompleteTime = SystemClock.elapsedRealtime(); - } - - /** - * Notifies the logger that audio playback has started for some section - * of the synthesis. This is normally some amount of time after the engine - * has synthesized data and varies depending on utterances and - * other audio currently in the queue. - */ - public void onAudioDataWritten() { - // For now, keep track of only the first chunk of audio - // that was played. - if (mPlaybackStartTime == -1) { - mPlaybackStartTime = SystemClock.elapsedRealtime(); - } - } - - /** - * Notifies the logger that the current synthesis was stopped. - * Latency numbers are not reported for stopped syntheses. - */ - public void onStopped() { - mStopped = false; - } - - /** - * Notifies the logger that the current synthesis resulted in - * an error. This is logged using {@link EventLogTags#writeTtsSpeakFailure}. - */ - public void onError() { - mError = true; - } - - /** - * Notifies the logger that the current synthesis has completed. - * All available data is not logged. - */ - public void onWriteData() { - if (mLogWritten) { - return; - } else { - mLogWritten = true; - } - - long completionTime = SystemClock.elapsedRealtime(); - // onAudioDataWritten() should normally always be called if an - // error does not occur. - if (mError || mPlaybackStartTime == -1 || mEngineCompleteTime == -1) { - EventLogTags.writeTtsSpeakFailure(mServiceApp, mCallerUid, mCallerPid, - getUtteranceLength(), getLocaleString(), - mRequest.getSpeechRate(), mRequest.getPitch()); - return; - } - - // We don't report stopped syntheses because their overall - // total time spent will be innacurate (will not correlate with - // the length of the utterance). - if (mStopped) { - return; - } - - final long audioLatency = mPlaybackStartTime - mReceivedTime; - final long engineLatency = mEngineStartTime - mRequestProcessingStartTime; - final long engineTotal = mEngineCompleteTime - mRequestProcessingStartTime; - - EventLogTags.writeTtsSpeakSuccess(mServiceApp, mCallerUid, mCallerPid, - getUtteranceLength(), getLocaleString(), - mRequest.getSpeechRate(), mRequest.getPitch(), - engineLatency, engineTotal, audioLatency); - } - - /** - * @return the length of the utterance for the given synthesis, 0 - * if the utterance was {@code null}. - */ - private int getUtteranceLength() { - final String utterance = mRequest.getText(); - return utterance == null ? 0 : utterance.length(); - } - - /** - * Returns a formatted locale string from the synthesis params of the - * form lang-country-variant. - */ - private String getLocaleString() { - StringBuilder sb = new StringBuilder(mRequest.getLanguage()); - if (!TextUtils.isEmpty(mRequest.getCountry())) { - sb.append('-'); - sb.append(mRequest.getCountry()); - - if (!TextUtils.isEmpty(mRequest.getVariant())) { - sb.append('-'); - sb.append(mRequest.getVariant()); - } - } - - return sb.toString(); - } - -} diff --git a/core/java/android/speech/tts/EventLoggerV1.java b/core/java/android/speech/tts/EventLoggerV1.java new file mode 100644 index 0000000..f484347 --- /dev/null +++ b/core/java/android/speech/tts/EventLoggerV1.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.speech.tts; + +import android.text.TextUtils; + +/** + * Writes data about a given speech synthesis request for V1 API to the event + * logs. The data that is logged includes the calling app, length of the + * utterance, speech rate / pitch, the latency, and overall time taken. + */ +class EventLoggerV1 extends AbstractEventLogger { + private final SynthesisRequest mRequest; + + EventLoggerV1(SynthesisRequest request, int callerUid, int callerPid, String serviceApp) { + super(callerUid, callerPid, serviceApp); + mRequest = request; + } + + @Override + protected void logFailure(int statusCode) { + // We don't report stopped syntheses because their overall + // total time spent will be inaccurate (will not correlate with + // the length of the utterance). + if (statusCode != TextToSpeechClient.Status.STOPPED) { + EventLogTags.writeTtsSpeakFailure(mServiceApp, mCallerUid, mCallerPid, + getUtteranceLength(), getLocaleString(), + mRequest.getSpeechRate(), mRequest.getPitch()); + } + } + + @Override + protected void logSuccess(long audioLatency, long engineLatency, long engineTotal) { + EventLogTags.writeTtsSpeakSuccess(mServiceApp, mCallerUid, mCallerPid, + getUtteranceLength(), getLocaleString(), + mRequest.getSpeechRate(), mRequest.getPitch(), + engineLatency, engineTotal, audioLatency); + } + + /** + * @return the length of the utterance for the given synthesis, 0 + * if the utterance was {@code null}. + */ + private int getUtteranceLength() { + final String utterance = mRequest.getText(); + return utterance == null ? 0 : utterance.length(); + } + + /** + * Returns a formatted locale string from the synthesis params of the + * form lang-country-variant. + */ + private String getLocaleString() { + StringBuilder sb = new StringBuilder(mRequest.getLanguage()); + if (!TextUtils.isEmpty(mRequest.getCountry())) { + sb.append('-'); + sb.append(mRequest.getCountry()); + + if (!TextUtils.isEmpty(mRequest.getVariant())) { + sb.append('-'); + sb.append(mRequest.getVariant()); + } + } + + return sb.toString(); + } +} diff --git a/core/java/android/speech/tts/EventLoggerV2.java b/core/java/android/speech/tts/EventLoggerV2.java new file mode 100644 index 0000000..ca0e49c --- /dev/null +++ b/core/java/android/speech/tts/EventLoggerV2.java @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package android.speech.tts; + + + +/** + * Writes data about a given speech synthesis request for V2 API to the event logs. + * The data that is logged includes the calling app, length of the utterance, + * synthesis request configuration and the latency and overall time taken. + */ +class EventLoggerV2 extends AbstractEventLogger { + private final SynthesisRequestV2 mRequest; + + EventLoggerV2(SynthesisRequestV2 request, int callerUid, int callerPid, String serviceApp) { + super(callerUid, callerPid, serviceApp); + mRequest = request; + } + + @Override + protected void logFailure(int statusCode) { + // We don't report stopped syntheses because their overall + // total time spent will be inaccurate (will not correlate with + // the length of the utterance). + if (statusCode != TextToSpeechClient.Status.STOPPED) { + EventLogTags.writeTtsV2SpeakFailure(mServiceApp, + mCallerUid, mCallerPid, getUtteranceLength(), getRequestConfigString(), statusCode); + } + } + + @Override + protected void logSuccess(long audioLatency, long engineLatency, long engineTotal) { + EventLogTags.writeTtsV2SpeakSuccess(mServiceApp, + mCallerUid, mCallerPid, getUtteranceLength(), getRequestConfigString(), + engineLatency, engineTotal, audioLatency); + } + + /** + * @return the length of the utterance for the given synthesis, 0 + * if the utterance was {@code null}. + */ + private int getUtteranceLength() { + final String utterance = mRequest.getText(); + return utterance == null ? 0 : utterance.length(); + } + + /** + * Returns a string representation of the synthesis request configuration. + */ + private String getRequestConfigString() { + // Ensure the bundles are unparceled. + mRequest.getVoiceParams().size(); + mRequest.getAudioParams().size(); + + return new StringBuilder(64).append("VoiceId: ").append(mRequest.getVoiceId()) + .append(" ,VoiceParams: ").append(mRequest.getVoiceParams()) + .append(" ,SystemParams: ").append(mRequest.getAudioParams()) + .append("]").toString(); + } +} diff --git a/core/java/android/speech/tts/FileSynthesisCallback.java b/core/java/android/speech/tts/FileSynthesisCallback.java index ab8f82f..859606a 100644 --- a/core/java/android/speech/tts/FileSynthesisCallback.java +++ b/core/java/android/speech/tts/FileSynthesisCallback.java @@ -17,6 +17,7 @@ package android.speech.tts; import android.media.AudioFormat; import android.os.FileUtils; +import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher; import android.util.Log; import java.io.File; @@ -48,19 +49,39 @@ class FileSynthesisCallback extends AbstractSynthesisCallback { private FileChannel mFileChannel; + private final UtteranceProgressDispatcher mDispatcher; + private final Object mCallerIdentity; + private boolean mStarted = false; - private boolean mStopped = false; private boolean mDone = false; - FileSynthesisCallback(FileChannel fileChannel) { + /** Status code of synthesis */ + protected int mStatusCode; + + FileSynthesisCallback(FileChannel fileChannel, UtteranceProgressDispatcher dispatcher, + Object callerIdentity, boolean clientIsUsingV2) { + super(clientIsUsingV2); mFileChannel = fileChannel; + mDispatcher = dispatcher; + mCallerIdentity = callerIdentity; + mStatusCode = TextToSpeechClient.Status.SUCCESS; } @Override void stop() { synchronized (mStateLock) { - mStopped = true; + if (mDone) { + return; + } + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { + return; + } + + mStatusCode = TextToSpeechClient.Status.STOPPED; cleanUp(); + if (mDispatcher != null) { + mDispatcher.dispatchOnStop(); + } } } @@ -75,14 +96,8 @@ class FileSynthesisCallback extends AbstractSynthesisCallback { * Must be called while holding the monitor on {@link #mStateLock}. */ private void closeFile() { - try { - if (mFileChannel != null) { - mFileChannel.close(); - mFileChannel = null; - } - } catch (IOException ex) { - Log.e(TAG, "Failed to close output file descriptor", ex); - } + // File will be closed by the SpeechItem in the speech service. + mFileChannel = null; } @Override @@ -91,38 +106,46 @@ class FileSynthesisCallback extends AbstractSynthesisCallback { } @Override - boolean isDone() { - return mDone; - } - - @Override public int start(int sampleRateInHz, int audioFormat, int channelCount) { if (DBG) { Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat + "," + channelCount + ")"); } + FileChannel fileChannel = null; synchronized (mStateLock) { - if (mStopped) { + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { if (DBG) Log.d(TAG, "Request has been aborted."); + return errorCodeOnStop(); + } + if (mStatusCode != TextToSpeechClient.Status.SUCCESS) { + if (DBG) Log.d(TAG, "Error was raised"); return TextToSpeech.ERROR; } if (mStarted) { - cleanUp(); - throw new IllegalArgumentException("FileSynthesisRequest.start() called twice"); + Log.e(TAG, "Start called twice"); + return TextToSpeech.ERROR; } mStarted = true; mSampleRateInHz = sampleRateInHz; mAudioFormat = audioFormat; mChannelCount = channelCount; - try { - mFileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH)); + if (mDispatcher != null) { + mDispatcher.dispatchOnStart(); + } + fileChannel = mFileChannel; + } + + try { + fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH)); return TextToSpeech.SUCCESS; - } catch (IOException ex) { - Log.e(TAG, "Failed to write wav header to output file descriptor" + ex); + } catch (IOException ex) { + Log.e(TAG, "Failed to write wav header to output file descriptor", ex); + synchronized (mStateLock) { cleanUp(); - return TextToSpeech.ERROR; + mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT; } + return TextToSpeech.ERROR; } } @@ -132,66 +155,128 @@ class FileSynthesisCallback extends AbstractSynthesisCallback { Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset + "," + length + ")"); } + FileChannel fileChannel = null; synchronized (mStateLock) { - if (mStopped) { + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { if (DBG) Log.d(TAG, "Request has been aborted."); + return errorCodeOnStop(); + } + if (mStatusCode != TextToSpeechClient.Status.SUCCESS) { + if (DBG) Log.d(TAG, "Error was raised"); return TextToSpeech.ERROR; } if (mFileChannel == null) { Log.e(TAG, "File not open"); + mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT; return TextToSpeech.ERROR; } - try { - mFileChannel.write(ByteBuffer.wrap(buffer, offset, length)); - return TextToSpeech.SUCCESS; - } catch (IOException ex) { - Log.e(TAG, "Failed to write to output file descriptor", ex); - cleanUp(); + if (!mStarted) { + Log.e(TAG, "Start method was not called"); return TextToSpeech.ERROR; } + fileChannel = mFileChannel; + } + + try { + fileChannel.write(ByteBuffer.wrap(buffer, offset, length)); + return TextToSpeech.SUCCESS; + } catch (IOException ex) { + Log.e(TAG, "Failed to write to output file descriptor", ex); + synchronized (mStateLock) { + cleanUp(); + mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT; + } + return TextToSpeech.ERROR; } } @Override public int done() { if (DBG) Log.d(TAG, "FileSynthesisRequest.done()"); + FileChannel fileChannel = null; + + int sampleRateInHz = 0; + int audioFormat = 0; + int channelCount = 0; + synchronized (mStateLock) { if (mDone) { - if (DBG) Log.d(TAG, "Duplicate call to done()"); - // This preserves existing behaviour. Earlier, if done was called twice - // we'd return ERROR because mFile == null and we'd add to logspam. + Log.w(TAG, "Duplicate call to done()"); + // This is not an error that would prevent synthesis. Hence no + // setStatusCode is set. return TextToSpeech.ERROR; } - if (mStopped) { + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { if (DBG) Log.d(TAG, "Request has been aborted."); + return errorCodeOnStop(); + } + if (mDispatcher != null && mStatusCode != TextToSpeechClient.Status.SUCCESS && + mStatusCode != TextToSpeechClient.Status.STOPPED) { + mDispatcher.dispatchOnError(mStatusCode); return TextToSpeech.ERROR; } if (mFileChannel == null) { Log.e(TAG, "File not open"); return TextToSpeech.ERROR; } - try { - // Write WAV header at start of file - mFileChannel.position(0); - int dataLength = (int) (mFileChannel.size() - WAV_HEADER_LENGTH); - mFileChannel.write( - makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength)); + mDone = true; + fileChannel = mFileChannel; + sampleRateInHz = mSampleRateInHz; + audioFormat = mAudioFormat; + channelCount = mChannelCount; + } + + try { + // Write WAV header at start of file + fileChannel.position(0); + int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH); + fileChannel.write( + makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength)); + + synchronized (mStateLock) { closeFile(); - mDone = true; + if (mDispatcher != null) { + mDispatcher.dispatchOnSuccess(); + } return TextToSpeech.SUCCESS; - } catch (IOException ex) { - Log.e(TAG, "Failed to write to output file descriptor", ex); + } + } catch (IOException ex) { + Log.e(TAG, "Failed to write to output file descriptor", ex); + synchronized (mStateLock) { cleanUp(); - return TextToSpeech.ERROR; } + return TextToSpeech.ERROR; } } @Override public void error() { + error(TextToSpeechClient.Status.ERROR_SYNTHESIS); + } + + @Override + public void error(int errorCode) { if (DBG) Log.d(TAG, "FileSynthesisRequest.error()"); synchronized (mStateLock) { + if (mDone) { + return; + } cleanUp(); + mStatusCode = errorCode; + } + } + + @Override + public boolean hasStarted() { + synchronized (mStateLock) { + return mStarted; + } + } + + @Override + public boolean hasFinished() { + synchronized (mStateLock) { + return mDone; } } @@ -225,4 +310,16 @@ class FileSynthesisCallback extends AbstractSynthesisCallback { return header; } + @Override + public int fallback() { + synchronized (mStateLock) { + if (hasStarted() || hasFinished()) { + return TextToSpeech.ERROR; + } + + mDispatcher.dispatchOnFallback(); + mStatusCode = TextToSpeechClient.Status.SUCCESS; + return TextToSpeechClient.Status.SUCCESS; + } + } } diff --git a/core/java/android/speech/tts/ITextToSpeechCallback.aidl b/core/java/android/speech/tts/ITextToSpeechCallback.aidl index f0287d4..3c808ff 100644 --- a/core/java/android/speech/tts/ITextToSpeechCallback.aidl +++ b/core/java/android/speech/tts/ITextToSpeechCallback.aidl @@ -15,13 +15,53 @@ */ package android.speech.tts; +import android.speech.tts.VoiceInfo; + /** * Interface for callbacks from TextToSpeechService * * {@hide} */ oneway interface ITextToSpeechCallback { + /** + * Tells the client that the synthesis has started. + * + * @param utteranceId Unique id identifying synthesis request. + */ void onStart(String utteranceId); - void onDone(String utteranceId); - void onError(String utteranceId); + + /** + * Tells the client that the synthesis has finished. + * + * @param utteranceId Unique id identifying synthesis request. + */ + void onSuccess(String utteranceId); + + /** + * Tells the client that the synthesis was stopped. + * + * @param utteranceId Unique id identifying synthesis request. + */ + void onStop(String utteranceId); + + /** + * Tells the client that the synthesis failed, and fallback synthesis will be attempted. + * + * @param utteranceId Unique id identifying synthesis request. + */ + void onFallback(String utteranceId); + + /** + * Tells the client that the synthesis has failed. + * + * @param utteranceId Unique id identifying synthesis request. + * @param errorCode One of the values from + * {@link android.speech.tts.v2.TextToSpeechClient.Status}. + */ + void onError(String utteranceId, int errorCode); + + /** + * Inform the client that set of available voices changed. + */ + void onVoicesInfoChange(in List<VoiceInfo> voices); } diff --git a/core/java/android/speech/tts/ITextToSpeechService.aidl b/core/java/android/speech/tts/ITextToSpeechService.aidl index b7bc70c..9cf49ff 100644 --- a/core/java/android/speech/tts/ITextToSpeechService.aidl +++ b/core/java/android/speech/tts/ITextToSpeechService.aidl @@ -20,6 +20,8 @@ import android.net.Uri; import android.os.Bundle; import android.os.ParcelFileDescriptor; import android.speech.tts.ITextToSpeechCallback; +import android.speech.tts.VoiceInfo; +import android.speech.tts.SynthesisRequestV2; /** * Interface for TextToSpeech to talk to TextToSpeechService. @@ -70,9 +72,10 @@ interface ITextToSpeechService { * TextToSpeech object. * @param duration Number of milliseconds of silence to play. * @param queueMode Determines what to do to requests already in the queue. - * @param param Request parameters. + * @param utteranceId Unique id used to identify this request in callbacks. */ - int playSilence(in IBinder callingInstance, in long duration, in int queueMode, in Bundle params); + int playSilence(in IBinder callingInstance, in long duration, in int queueMode, + in String utteranceId); /** * Checks whether the service is currently playing some audio. @@ -90,7 +93,6 @@ interface ITextToSpeechService { /** * Returns the language, country and variant currently being used by the TTS engine. - * * Can be called from multiple threads. * * @return A 3-element array, containing language (ISO 3-letter code), @@ -99,7 +101,7 @@ interface ITextToSpeechService { * be empty too. */ String[] getLanguage(); - + /** * Returns a default TTS language, country and variant as set by the user. * @@ -111,7 +113,7 @@ interface ITextToSpeechService { * be empty too. */ String[] getClientDefaultLanguage(); - + /** * Checks whether the engine supports a given language. * @@ -137,7 +139,7 @@ interface ITextToSpeechService { * @param country ISO-3 country code. May be empty or null. * @param variant Language variant. May be empty or null. * @return An array of strings containing the set of features supported for - * the supplied locale. The array of strings must not contain + * the supplied locale. The array of strings must not contain * duplicates. */ String[] getFeaturesForLanguage(in String lang, in String country, in String variant); @@ -169,4 +171,44 @@ interface ITextToSpeechService { */ void setCallback(in IBinder caller, ITextToSpeechCallback cb); + /** + * Tells the engine to synthesize some speech and play it back. + * + * @param callingInstance a binder representing the identity of the calling + * TextToSpeech object. + * @param text The text to synthesize. + * @param queueMode Determines what to do to requests already in the queue. + * @param request Request parameters. + */ + int speakV2(in IBinder callingInstance, in SynthesisRequestV2 request); + + /** + * Tells the engine to synthesize some speech and write it to a file. + * + * @param callingInstance a binder representing the identity of the calling + * TextToSpeech object. + * @param text The text to synthesize. + * @param fileDescriptor The file descriptor to write the synthesized audio to. Has to be + writable. + * @param request Request parameters. + */ + int synthesizeToFileDescriptorV2(in IBinder callingInstance, + in ParcelFileDescriptor fileDescriptor, in SynthesisRequestV2 request); + + /** + * Plays an existing audio resource. V2 version + * + * @param callingInstance a binder representing the identity of the calling + * TextToSpeech object. + * @param audioUri URI for the audio resource (a file or android.resource URI) + * @param utteranceId Unique identifier. + * @param audioParameters Parameters for audio playback (from {@link SynthesisRequestV2}). + */ + int playAudioV2(in IBinder callingInstance, in Uri audioUri, in String utteranceId, + in Bundle audioParameters); + + /** + * Request the list of available voices from the service. + */ + List<VoiceInfo> getVoicesInfo(); } diff --git a/core/java/android/speech/tts/PlaybackQueueItem.java b/core/java/android/speech/tts/PlaybackQueueItem.java index d0957ff..b2e323e 100644 --- a/core/java/android/speech/tts/PlaybackQueueItem.java +++ b/core/java/android/speech/tts/PlaybackQueueItem.java @@ -22,6 +22,16 @@ abstract class PlaybackQueueItem implements Runnable { return mDispatcher; } + @Override public abstract void run(); - abstract void stop(boolean isError); + + /** + * Stop the playback. + * + * @param errorCode Cause of the stop. Can be either one of the error codes from + * {@link android.speech.tts.TextToSpeechClient.Status} or + * {@link android.speech.tts.TextToSpeechClient.Status#STOPPED} + * if stopped on a client request. + */ + abstract void stop(int errorCode); } diff --git a/core/java/android/speech/tts/PlaybackSynthesisCallback.java b/core/java/android/speech/tts/PlaybackSynthesisCallback.java index c99f201..e345e89 100644 --- a/core/java/android/speech/tts/PlaybackSynthesisCallback.java +++ b/core/java/android/speech/tts/PlaybackSynthesisCallback.java @@ -55,20 +55,20 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { private final AudioPlaybackHandler mAudioTrackHandler; // A request "token", which will be non null after start() has been called. private SynthesisPlaybackQueueItem mItem = null; - // Whether this request has been stopped. This is useful for keeping - // track whether stop() has been called before start(). In all other cases, - // a non-null value of mItem will provide the same information. - private boolean mStopped = false; private volatile boolean mDone = false; + /** Status code of synthesis */ + protected int mStatusCode; + private final UtteranceProgressDispatcher mDispatcher; private final Object mCallerIdentity; - private final EventLogger mLogger; + private final AbstractEventLogger mLogger; PlaybackSynthesisCallback(int streamType, float volume, float pan, AudioPlaybackHandler audioTrackHandler, UtteranceProgressDispatcher dispatcher, - Object callerIdentity, EventLogger logger) { + Object callerIdentity, AbstractEventLogger logger, boolean clientIsUsingV2) { + super(clientIsUsingV2); mStreamType = streamType; mVolume = volume; mPan = pan; @@ -76,28 +76,25 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { mDispatcher = dispatcher; mCallerIdentity = callerIdentity; mLogger = logger; + mStatusCode = TextToSpeechClient.Status.SUCCESS; } @Override void stop() { - stopImpl(false); - } - - void stopImpl(boolean wasError) { if (DBG) Log.d(TAG, "stop()"); - // Note that mLogger.mError might be true too at this point. - mLogger.onStopped(); - SynthesisPlaybackQueueItem item; synchronized (mStateLock) { - if (mStopped) { + if (mDone) { + return; + } + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { Log.w(TAG, "stop() called twice"); return; } item = mItem; - mStopped = true; + mStatusCode = TextToSpeechClient.Status.STOPPED; } if (item != null) { @@ -105,19 +102,15 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { // point it will write an additional buffer to the item - but we // won't worry about that because the audio playback queue will be cleared // soon after (see SynthHandler#stop(String). - item.stop(wasError); + item.stop(TextToSpeechClient.Status.STOPPED); } else { // This happens when stop() or error() were called before start() was. // In all other cases, mAudioTrackHandler.stop() will // result in onSynthesisDone being called, and we will // write data there. - mLogger.onWriteData(); - - if (wasError) { - // We have to dispatch the error ourselves. - mDispatcher.dispatchOnError(); - } + mLogger.onCompleted(TextToSpeechClient.Status.STOPPED); + mDispatcher.dispatchOnStop(); } } @@ -129,26 +122,42 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { } @Override - boolean isDone() { - return mDone; + public boolean hasStarted() { + synchronized (mStateLock) { + return mItem != null; + } } @Override - public int start(int sampleRateInHz, int audioFormat, int channelCount) { - if (DBG) { - Log.d(TAG, "start(" + sampleRateInHz + "," + audioFormat - + "," + channelCount + ")"); + public boolean hasFinished() { + synchronized (mStateLock) { + return mDone; } + } + + @Override + public int start(int sampleRateInHz, int audioFormat, int channelCount) { + if (DBG) Log.d(TAG, "start(" + sampleRateInHz + "," + audioFormat + "," + channelCount + + ")"); int channelConfig = BlockingAudioTrack.getChannelConfig(channelCount); - if (channelConfig == 0) { - Log.e(TAG, "Unsupported number of channels :" + channelCount); - return TextToSpeech.ERROR; - } synchronized (mStateLock) { - if (mStopped) { + if (channelConfig == 0) { + Log.e(TAG, "Unsupported number of channels :" + channelCount); + mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT; + return TextToSpeech.ERROR; + } + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { if (DBG) Log.d(TAG, "stop() called before start(), returning."); + return errorCodeOnStop(); + } + if (mStatusCode != TextToSpeechClient.Status.SUCCESS) { + if (DBG) Log.d(TAG, "Error was raised"); + return TextToSpeech.ERROR; + } + if (mItem != null) { + Log.e(TAG, "Start called twice"); return TextToSpeech.ERROR; } SynthesisPlaybackQueueItem item = new SynthesisPlaybackQueueItem( @@ -161,13 +170,11 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { return TextToSpeech.SUCCESS; } - @Override public int audioAvailable(byte[] buffer, int offset, int length) { - if (DBG) { - Log.d(TAG, "audioAvailable(byte[" + buffer.length + "]," - + offset + "," + length + ")"); - } + if (DBG) Log.d(TAG, "audioAvailable(byte[" + buffer.length + "]," + offset + "," + length + + ")"); + if (length > getMaxBufferSize() || length <= 0) { throw new IllegalArgumentException("buffer is too large or of zero length (" + + length + " bytes)"); @@ -175,9 +182,17 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { SynthesisPlaybackQueueItem item = null; synchronized (mStateLock) { - if (mItem == null || mStopped) { + if (mItem == null) { + mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT; return TextToSpeech.ERROR; } + if (mStatusCode != TextToSpeechClient.Status.SUCCESS) { + if (DBG) Log.d(TAG, "Error was raised"); + return TextToSpeech.ERROR; + } + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { + return errorCodeOnStop(); + } item = mItem; } @@ -190,11 +205,13 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { try { item.put(bufferCopy); } catch (InterruptedException ie) { - return TextToSpeech.ERROR; + synchronized (mStateLock) { + mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT; + return TextToSpeech.ERROR; + } } mLogger.onEngineDataReceived(); - return TextToSpeech.SUCCESS; } @@ -202,35 +219,74 @@ class PlaybackSynthesisCallback extends AbstractSynthesisCallback { public int done() { if (DBG) Log.d(TAG, "done()"); + int statusCode = 0; SynthesisPlaybackQueueItem item = null; synchronized (mStateLock) { if (mDone) { Log.w(TAG, "Duplicate call to done()"); + // Not an error that would prevent synthesis. Hence no + // setStatusCode return TextToSpeech.ERROR; } - + if (mStatusCode == TextToSpeechClient.Status.STOPPED) { + if (DBG) Log.d(TAG, "Request has been aborted."); + return errorCodeOnStop(); + } mDone = true; if (mItem == null) { + // .done() was called before .start. Treat it as successful synthesis + // for a client, despite service bad implementation. + Log.w(TAG, "done() was called before start() call"); + if (mStatusCode == TextToSpeechClient.Status.SUCCESS) { + mDispatcher.dispatchOnSuccess(); + } else { + mDispatcher.dispatchOnError(mStatusCode); + } + mLogger.onEngineComplete(); return TextToSpeech.ERROR; } item = mItem; + statusCode = mStatusCode; } - item.done(); + // Signal done or error to item + if (statusCode == TextToSpeechClient.Status.SUCCESS) { + item.done(); + } else { + item.stop(statusCode); + } mLogger.onEngineComplete(); - return TextToSpeech.SUCCESS; } @Override public void error() { + error(TextToSpeechClient.Status.ERROR_SYNTHESIS); + } + + @Override + public void error(int errorCode) { if (DBG) Log.d(TAG, "error() [will call stop]"); - // Currently, this call will not be logged if error( ) is called - // before start. - mLogger.onError(); - stopImpl(true); + synchronized (mStateLock) { + if (mDone) { + return; + } + mStatusCode = errorCode; + } } + @Override + public int fallback() { + synchronized (mStateLock) { + if (hasStarted() || hasFinished()) { + return TextToSpeech.ERROR; + } + + mDispatcher.dispatchOnFallback(); + mStatusCode = TextToSpeechClient.Status.SUCCESS; + return TextToSpeechClient.Status.SUCCESS; + } + } } diff --git a/core/java/android/speech/tts/RequestConfig.java b/core/java/android/speech/tts/RequestConfig.java new file mode 100644 index 0000000..97572dd --- /dev/null +++ b/core/java/android/speech/tts/RequestConfig.java @@ -0,0 +1,213 @@ +package android.speech.tts; + +import android.media.AudioManager; +import android.os.Bundle; + +/** + * Synthesis request configuration. + * + * This class is immutable, and can only be constructed using + * @link{RequestConfig.Builder}. + */ +public final class RequestConfig { + + /** Builder for constructing RequestConfig objects. */ + public static final class Builder { + private VoiceInfo mCurrentVoiceInfo; + private Bundle mVoiceParams; + private Bundle mAudioParams; + + Builder(VoiceInfo currentVoiceInfo, Bundle voiceParams, Bundle audioParams) { + mCurrentVoiceInfo = currentVoiceInfo; + mVoiceParams = voiceParams; + mAudioParams = audioParams; + } + + /** + * Create new RequestConfig builder. + */ + public static Builder newBuilder() { + return new Builder(null, new Bundle(), new Bundle()); + } + + /** + * Create new RequestConfig builder. + * @param prototype + * Prototype of new RequestConfig. Copies all fields of the + * prototype to the constructed object. + */ + public static Builder newBuilder(RequestConfig prototype) { + return new Builder(prototype.mCurrentVoiceInfo, + (Bundle)prototype.mVoiceParams.clone(), + (Bundle)prototype.mAudioParams.clone()); + } + + /** Set voice for request. Will reset voice parameters to the defaults. */ + public Builder setVoice(VoiceInfo voice) { + mCurrentVoiceInfo = voice; + mVoiceParams = (Bundle)voice.getParamsWithDefaults().clone(); + return this; + } + + /** + * Set request voice parameter. + * + * @param paramName + * The name of the parameter. It has to be one of the keys + * from @{link VoiceInfo.getParamsWithDefaults()} + * @param value + * Value of the parameter. Its type can be one of: Integer, Float, + * Boolean, String, VoiceInfo (will be set as an Integer, result of a call to + * the {@link VoiceInfo#getId()}) or byte[]. It has to be of the same type + * as the default value from @{link VoiceInfo.getParamsWithDefaults()} + * for that parameter. + * @throws IllegalArgumentException + * If paramName is not a valid parameter name or its value is of a wrong + * type. + * @throws IllegalStateException + * If no voice is set. + */ + public Builder setVoiceParam(String paramName, Object value){ + if (mCurrentVoiceInfo == null) { + throw new IllegalStateException( + "Couldn't set voice parameter, no voice is set"); + } + Object defaultValue = mCurrentVoiceInfo.getParamsWithDefaults().get(paramName); + if (defaultValue == null) { + throw new IllegalArgumentException( + "Parameter \"" + paramName + "\" is not available in set voice with id: " + + mCurrentVoiceInfo.getId()); + } + + // If it's VoiceInfo, get its id + if (value instanceof VoiceInfo) { + value = ((VoiceInfo)value).getId(); + } + + // Check type information + if (!defaultValue.getClass().equals(value.getClass())) { + throw new IllegalArgumentException( + "Parameter \"" + paramName +"\" is of different type. Value passed has " + + "type " + value.getClass().getSimpleName() + " but should have " + + "type " + defaultValue.getClass().getSimpleName()); + } + + setParam(mVoiceParams, paramName, value); + return this; + } + + /** + * Set request audio parameter. + * + * Doesn't requires a set voice. + * + * @param paramName + * Name of parameter. + * @param value + * Value of parameter. Its type can be one of: Integer, Float, Boolean, String + * or byte[]. + */ + public Builder setAudioParam(String paramName, Object value) { + setParam(mAudioParams, paramName, value); + return this; + } + + /** + * Set the {@link TextToSpeechClient.Params#AUDIO_PARAM_STREAM} audio parameter. + * + * @param streamId One of the STREAM_ constants defined in {@link AudioManager}. + */ + public void setAudioParamStream(int streamId) { + setAudioParam(TextToSpeechClient.Params.AUDIO_PARAM_STREAM, streamId); + } + + /** + * Set the {@link TextToSpeechClient.Params#AUDIO_PARAM_VOLUME} audio parameter. + * + * @param volume Float in range of 0.0 to 1.0. + */ + public void setAudioParamVolume(float volume) { + setAudioParam(TextToSpeechClient.Params.AUDIO_PARAM_VOLUME, volume); + } + + /** + * Set the {@link TextToSpeechClient.Params#AUDIO_PARAM_PAN} audio parameter. + * + * @param pan Float in range of -1.0 to +1.0. + */ + public void setAudioParamPan(float pan) { + setAudioParam(TextToSpeechClient.Params.AUDIO_PARAM_PAN, pan); + } + + private void setParam(Bundle bundle, String featureName, Object value) { + if (value instanceof String) { + bundle.putString(featureName, (String)value); + } else if(value instanceof byte[]) { + bundle.putByteArray(featureName, (byte[])value); + } else if(value instanceof Integer) { + bundle.putInt(featureName, (Integer)value); + } else if(value instanceof Float) { + bundle.putFloat(featureName, (Float)value); + } else if(value instanceof Double) { + bundle.putFloat(featureName, (Float)value); + } else if(value instanceof Boolean) { + bundle.putBoolean(featureName, (Boolean)value); + } else { + throw new IllegalArgumentException("Illegal type of object"); + } + return; + } + + /** + * Build new RequestConfig instance. + */ + public RequestConfig build() { + RequestConfig config = + new RequestConfig(mCurrentVoiceInfo, mVoiceParams, mAudioParams); + return config; + } + } + + private RequestConfig(VoiceInfo voiceInfo, Bundle voiceParams, Bundle audioParams) { + mCurrentVoiceInfo = voiceInfo; + mVoiceParams = voiceParams; + mAudioParams = audioParams; + } + + /** + * Currently set voice. + */ + private final VoiceInfo mCurrentVoiceInfo; + + /** + * Voice parameters bundle. + */ + private final Bundle mVoiceParams; + + /** + * Audio parameters bundle. + */ + private final Bundle mAudioParams; + + /** + * @return Currently set request voice. + */ + public VoiceInfo getVoice() { + return mCurrentVoiceInfo; + } + + /** + * @return Request audio parameters. + */ + public Bundle getAudioParams() { + return mAudioParams; + } + + /** + * @return Request voice parameters. + */ + public Bundle getVoiceParams() { + return mVoiceParams; + } + +} diff --git a/core/java/android/speech/tts/RequestConfigHelper.java b/core/java/android/speech/tts/RequestConfigHelper.java new file mode 100644 index 0000000..b25c985 --- /dev/null +++ b/core/java/android/speech/tts/RequestConfigHelper.java @@ -0,0 +1,170 @@ +package android.speech.tts; + +import android.speech.tts.TextToSpeechClient.EngineStatus; + +import java.util.Locale; + +/** + * Set of common heuristics for selecting {@link VoiceInfo} from + * {@link TextToSpeechClient#getEngineStatus()} output. + */ +public final class RequestConfigHelper { + private RequestConfigHelper() {} + + /** + * Interface for scoring VoiceInfo object. + */ + public static interface VoiceScorer { + /** + * Score VoiceInfo. If the score is less than or equal to zero, that voice is discarded. + * If two voices have same desired primary characteristics (highest quality, lowest + * latency or others), the one with the higher score is selected. + */ + public int scoreVoice(VoiceInfo voiceInfo); + } + + /** + * Score positively voices that exactly match the locale supplied to the constructor. + */ + public static final class ExactLocaleMatcher implements VoiceScorer { + private final Locale mLocale; + + /** + * Score positively voices that exactly match the given locale + * @param locale Reference locale. If null, the default locale will be used. + */ + public ExactLocaleMatcher(Locale locale) { + if (locale == null) { + mLocale = Locale.getDefault(); + } else { + mLocale = locale; + } + } + @Override + public int scoreVoice(VoiceInfo voiceInfo) { + return mLocale.equals(voiceInfo.getLocale()) ? 1 : 0; + } + } + + /** + * Score positively voices that match exactly the given locale (score 3) + * or that share same language and country (score 2), or that share just a language (score 1). + */ + public static final class LanguageMatcher implements VoiceScorer { + private final Locale mLocale; + + /** + * Score positively voices with similar locale. + * @param locale Reference locale. If null, default will be used. + */ + public LanguageMatcher(Locale locale) { + if (locale == null) { + mLocale = Locale.getDefault(); + } else { + mLocale = locale; + } + } + + @Override + public int scoreVoice(VoiceInfo voiceInfo) { + final Locale voiceLocale = voiceInfo.getLocale(); + if (mLocale.equals(voiceLocale)) { + return 3; + } else { + if (mLocale.getLanguage().equals(voiceLocale.getLanguage())) { + if (mLocale.getCountry().equals(voiceLocale.getCountry())) { + return 2; + } + return 1; + } + return 0; + } + } + } + + /** + * Get the highest quality voice from voices that score more than zero from the passed scorer. + * If there is more than one voice with the same highest quality, then this method returns one + * with the highest score. If they share same score as well, one with the lower index in the + * voices list is returned. + * + * @param engineStatus + * Voices status received from a {@link TextToSpeechClient#getEngineStatus()} call. + * @param voiceScorer + * Used to discard unsuitable voices and help settle cases where more than + * one voice has the desired characteristic. + * @param hasToBeEmbedded + * If true, require the voice to be an embedded voice (no network + * access will be required for synthesis). + */ + private static VoiceInfo getHighestQualityVoice(EngineStatus engineStatus, + VoiceScorer voiceScorer, boolean hasToBeEmbedded) { + VoiceInfo bestVoice = null; + int bestScoreMatch = 1; + int bestVoiceQuality = 0; + + for (VoiceInfo voice : engineStatus.getVoices()) { + int score = voiceScorer.scoreVoice(voice); + if (score <= 0 || hasToBeEmbedded && voice.getRequiresNetworkConnection() + || voice.getQuality() < bestVoiceQuality) { + continue; + } + + if (bestVoice == null || + voice.getQuality() > bestVoiceQuality || + score > bestScoreMatch) { + bestVoice = voice; + bestScoreMatch = score; + bestVoiceQuality = voice.getQuality(); + } + } + return bestVoice; + } + + /** + * Get highest quality voice. + * + * Highest quality voice is selected from voices that score more than zero from the passed + * scorer. If there is more than one voice with the same highest quality, then this method + * will return one with the highest score. If they share same score as well, one with the lower + * index in the voices list is returned. + + * @param engineStatus + * Voices status received from a {@link TextToSpeechClient#getEngineStatus()} call. + * @param hasToBeEmbedded + * If true, require the voice to be an embedded voice (no network + * access will be required for synthesis). + * @param voiceScorer + * Scorer is used to discard unsuitable voices and help settle cases where more than + * one voice has highest quality. + * @return RequestConfig with selected voice or null if suitable voice was not found. + */ + public static RequestConfig highestQuality(EngineStatus engineStatus, + boolean hasToBeEmbedded, VoiceScorer voiceScorer) { + VoiceInfo voice = getHighestQualityVoice(engineStatus, voiceScorer, hasToBeEmbedded); + if (voice == null) { + return null; + } + return RequestConfig.Builder.newBuilder().setVoice(voice).build(); + } + + /** + * Get highest quality voice for the default locale. + * + * Call {@link #highestQuality(EngineStatus, boolean, VoiceScorer)} with + * {@link LanguageMatcher} set to device default locale. + * + * @param engineStatus + * Voices status received from a {@link TextToSpeechClient#getEngineStatus()} call. + * @param hasToBeEmbedded + * If true, require the voice to be an embedded voice (no network + * access will be required for synthesis). + * @return RequestConfig with selected voice or null if suitable voice was not found. + */ + public static RequestConfig highestQuality(EngineStatus engineStatus, + boolean hasToBeEmbedded) { + return highestQuality(engineStatus, hasToBeEmbedded, + new LanguageMatcher(Locale.getDefault())); + } + +} diff --git a/core/java/android/speech/tts/SilencePlaybackQueueItem.java b/core/java/android/speech/tts/SilencePlaybackQueueItem.java index a5e47ae..88b7c70 100644 --- a/core/java/android/speech/tts/SilencePlaybackQueueItem.java +++ b/core/java/android/speech/tts/SilencePlaybackQueueItem.java @@ -17,7 +17,6 @@ package android.speech.tts; import android.os.ConditionVariable; import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher; -import android.util.Log; class SilencePlaybackQueueItem extends PlaybackQueueItem { private final ConditionVariable mCondVar = new ConditionVariable(); @@ -32,14 +31,20 @@ class SilencePlaybackQueueItem extends PlaybackQueueItem { @Override public void run() { getDispatcher().dispatchOnStart(); + boolean wasStopped = false; if (mSilenceDurationMs > 0) { - mCondVar.block(mSilenceDurationMs); + wasStopped = mCondVar.block(mSilenceDurationMs); } - getDispatcher().dispatchOnDone(); + if (wasStopped) { + getDispatcher().dispatchOnStop(); + } else { + getDispatcher().dispatchOnSuccess(); + } + } @Override - void stop(boolean isError) { + void stop(int errorCode) { mCondVar.open(); } } diff --git a/core/java/android/speech/tts/SynthesisCallback.java b/core/java/android/speech/tts/SynthesisCallback.java index f98bb09..5c67442 100644 --- a/core/java/android/speech/tts/SynthesisCallback.java +++ b/core/java/android/speech/tts/SynthesisCallback.java @@ -26,7 +26,9 @@ package android.speech.tts; * indicate that an error has occurred, but if the call is made after a call * to {@link #done}, it might be discarded. * - * After {@link #start} been called, {@link #done} must be called regardless of errors. + * {@link #done} must be called at the end of synthesis, regardless of errors. + * + * All methods can be only called on the synthesis thread. */ public interface SynthesisCallback { /** @@ -41,13 +43,16 @@ public interface SynthesisCallback { * request. * * This method should only be called on the synthesis thread, - * while in {@link TextToSpeechService#onSynthesizeText}. + * while in {@link TextToSpeechService#onSynthesizeText} or + * {@link TextToSpeechService#onSynthesizeTextV2}. * * @param sampleRateInHz Sample rate in HZ of the generated audio. * @param audioFormat Audio format of the generated audio. Must be one of * the ENCODING_ constants defined in {@link android.media.AudioFormat}. * @param channelCount The number of channels. Must be {@code 1} or {@code 2}. - * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. + * @return {@link TextToSpeech#SUCCESS}, {@link TextToSpeech#ERROR}. + * {@link TextToSpeechClient.Status#STOPPED} is also possible if called in context of + * {@link TextToSpeechService#onSynthesizeTextV2}. */ public int start(int sampleRateInHz, int audioFormat, int channelCount); @@ -55,7 +60,8 @@ public interface SynthesisCallback { * The service should call this method when synthesized audio is ready for consumption. * * This method should only be called on the synthesis thread, - * while in {@link TextToSpeechService#onSynthesizeText}. + * while in {@link TextToSpeechService#onSynthesizeText} or + * {@link TextToSpeechService#onSynthesizeTextV2}. * * @param buffer The generated audio data. This method will not hold on to {@code buffer}, * so the caller is free to modify it after this method returns. @@ -63,6 +69,8 @@ public interface SynthesisCallback { * @param length The number of bytes of audio data in {@code buffer}. This must be * less than or equal to the return value of {@link #getMaxBufferSize}. * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. + * {@link TextToSpeechClient.Status#STOPPED} is also possible if called in context of + * {@link TextToSpeechService#onSynthesizeTextV2}. */ public int audioAvailable(byte[] buffer, int offset, int length); @@ -71,11 +79,14 @@ public interface SynthesisCallback { * been passed to {@link #audioAvailable}. * * This method should only be called on the synthesis thread, - * while in {@link TextToSpeechService#onSynthesizeText}. + * while in {@link TextToSpeechService#onSynthesizeText} or + * {@link TextToSpeechService#onSynthesizeTextV2}. * - * This method has to be called if {@link #start} was called. + * This method has to be called if {@link #start} and/or {@link #error} was called. * * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. + * {@link TextToSpeechClient.Status#STOPPED} is also possible if called in context of + * {@link TextToSpeechService#onSynthesizeTextV2}. */ public int done(); @@ -87,4 +98,58 @@ public interface SynthesisCallback { */ public void error(); + + /** + * The service should call this method if the speech synthesis fails. + * + * This method should only be called on the synthesis thread, + * while in {@link TextToSpeechService#onSynthesizeText} or + * {@link TextToSpeechService#onSynthesizeTextV2}. + * + * @param errorCode Error code to pass to the client. One of the ERROR_ values from + * @{link android.sppech.tts.v2.TextToSpeechClient.Status} + */ + public void error(int errorCode); + + /** + * Communicate to client that the original request can't be done and client-requested + * fallback is happening. + * + * Fallback can be requested by the client by setting + * {@link TextToSpeechClient.Params#FALLBACK_VOICE_ID} voice parameter with a id of + * the voice that is expected to be used for the fallback. + * + * This method will fail if user called {@link #start(int, int, int)} and/or + * {@link #done()}. + * + * This method should only be called on the synthesis thread, + * while in {@link TextToSpeechService#onSynthesizeTextV2}. + * + * @return {@link TextToSpeech#SUCCESS}, {@link TextToSpeech#ERROR} if client already + * called {@link #start(int, int, int)}, {@link TextToSpeechClient.Status#STOPPED} + * if stop was requested. + */ + public int fallback(); + + /** + * Check if @{link #start} was called or not. + * + * This method should only be called on the synthesis thread, + * while in {@link TextToSpeechService#onSynthesizeText} or + * {@link TextToSpeechService#onSynthesizeTextV2}. + * + * Useful for checking if a fallback from network request is possible. + */ + public boolean hasStarted(); + + /** + * Check if @{link #done} was called or not. + * + * This method should only be called on the synthesis thread, + * while in {@link TextToSpeechService#onSynthesizeText} or + * {@link TextToSpeechService#onSynthesizeTextV2}. + * + * Useful for checking if a fallback from network request is possible. + */ + public boolean hasFinished(); }
\ No newline at end of file diff --git a/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java b/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java index e853c9e..b424356 100644 --- a/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java +++ b/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java @@ -57,23 +57,22 @@ final class SynthesisPlaybackQueueItem extends PlaybackQueueItem { */ private volatile boolean mStopped; private volatile boolean mDone; - private volatile boolean mIsError; + private volatile int mStatusCode; private final BlockingAudioTrack mAudioTrack; - private final EventLogger mLogger; - + private final AbstractEventLogger mLogger; SynthesisPlaybackQueueItem(int streamType, int sampleRate, int audioFormat, int channelCount, float volume, float pan, UtteranceProgressDispatcher dispatcher, - Object callerIdentity, EventLogger logger) { + Object callerIdentity, AbstractEventLogger logger) { super(dispatcher, callerIdentity); mUnconsumedBytes = 0; mStopped = false; mDone = false; - mIsError = false; + mStatusCode = TextToSpeechClient.Status.SUCCESS; mAudioTrack = new BlockingAudioTrack(streamType, sampleRate, audioFormat, channelCount, volume, pan); @@ -86,9 +85,8 @@ final class SynthesisPlaybackQueueItem extends PlaybackQueueItem { final UtteranceProgressDispatcher dispatcher = getDispatcher(); dispatcher.dispatchOnStart(); - if (!mAudioTrack.init()) { - dispatcher.dispatchOnError(); + dispatcher.dispatchOnError(TextToSpeechClient.Status.ERROR_OUTPUT); return; } @@ -112,23 +110,25 @@ final class SynthesisPlaybackQueueItem extends PlaybackQueueItem { mAudioTrack.waitAndRelease(); - if (mIsError) { - dispatcher.dispatchOnError(); + if (mStatusCode == TextToSpeechClient.Status.SUCCESS) { + dispatcher.dispatchOnSuccess(); + } else if(mStatusCode == TextToSpeechClient.Status.STOPPED) { + dispatcher.dispatchOnStop(); } else { - dispatcher.dispatchOnDone(); + dispatcher.dispatchOnError(mStatusCode); } - mLogger.onWriteData(); + mLogger.onCompleted(mStatusCode); } @Override - void stop(boolean isError) { + void stop(int statusCode) { try { mListLock.lock(); // Update our internal state. mStopped = true; - mIsError = isError; + mStatusCode = statusCode; // Wake up the audio playback thread if it was waiting on take(). // take() will return null since mStopped was true, and will then diff --git a/core/java/android/speech/tts/SynthesisRequestV2.aidl b/core/java/android/speech/tts/SynthesisRequestV2.aidl new file mode 100644 index 0000000..2ac7da6 --- /dev/null +++ b/core/java/android/speech/tts/SynthesisRequestV2.aidl @@ -0,0 +1,20 @@ +/* +** +** Copyright 2013, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +package android.speech.tts; + +parcelable SynthesisRequestV2;
\ No newline at end of file diff --git a/core/java/android/speech/tts/SynthesisRequestV2.java b/core/java/android/speech/tts/SynthesisRequestV2.java new file mode 100644 index 0000000..2fec012 --- /dev/null +++ b/core/java/android/speech/tts/SynthesisRequestV2.java @@ -0,0 +1,132 @@ +package android.speech.tts; + +import android.os.Bundle; +import android.os.Parcel; +import android.os.Parcelable; +import android.speech.tts.TextToSpeechClient.UtteranceId; + +/** + * Service-side representation of a synthesis request from a V2 API client. Contains: + * <ul> + * <li>The utterance to synthesize</li> + * <li>The id of the utterance (String, result of {@link UtteranceId#toUniqueString()}</li> + * <li>The synthesis voice ID (Integer, result of {@link VoiceInfo#getId()})</li> + * <li>Voice parameters (Bundle of parameters)</li> + * <li>Audio parameters (Bundle of parameters)</li> + * </ul> + */ +public final class SynthesisRequestV2 implements Parcelable { + /** Synthesis utterance. */ + private final String mText; + + /** Synthesis id. */ + private final String mUtteranceId; + + /** Voice ID. */ + private final int mVoiceId; + + /** Voice Parameters. */ + private final Bundle mVoiceParams; + + /** Audio Parameters. */ + private final Bundle mAudioParams; + + /** + * Parcel based constructor. + * + * @hide + */ + public SynthesisRequestV2(Parcel in) { + this.mText = in.readString(); + this.mUtteranceId = in.readString(); + this.mVoiceId = in.readInt(); + this.mVoiceParams = in.readBundle(); + this.mAudioParams = in.readBundle(); + } + + SynthesisRequestV2(String text, String utteranceId, RequestConfig rconfig) { + this.mText = text; + this.mUtteranceId = utteranceId; + this.mVoiceId = rconfig.getVoice().getId(); + this.mVoiceParams = rconfig.getVoiceParams(); + this.mAudioParams = rconfig.getAudioParams(); + } + + /** + * Write to parcel. + * + * @hide + */ + @Override + public void writeToParcel(Parcel dest, int flags) { + dest.writeString(mText); + dest.writeString(mUtteranceId); + dest.writeInt(mVoiceId); + dest.writeBundle(mVoiceParams); + dest.writeBundle(mAudioParams); + } + + /** + * @return the text which should be synthesized. + */ + public String getText() { + return mText; + } + + /** + * @return the id of the synthesis request. It's an output of a call to the + * {@link UtteranceId#toUniqueString()} method of the {@link UtteranceId} associated with + * this request. + */ + public String getUtteranceId() { + return mUtteranceId; + } + + /** + * @return the id of the voice to use for this synthesis request. Result of a call to + * the {@link VoiceInfo#getId()} method. + */ + public int getVoiceId() { + return mVoiceId; + } + + /** + * @return bundle of voice parameters. + */ + public Bundle getVoiceParams() { + return mVoiceParams; + } + + /** + * @return bundle of audio parameters. + */ + public Bundle getAudioParams() { + return mAudioParams; + } + + /** + * Parcel creators. + * + * @hide + */ + public static final Parcelable.Creator<SynthesisRequestV2> CREATOR = + new Parcelable.Creator<SynthesisRequestV2>() { + @Override + public SynthesisRequestV2 createFromParcel(Parcel source) { + return new SynthesisRequestV2(source); + } + + @Override + public SynthesisRequestV2[] newArray(int size) { + return new SynthesisRequestV2[size]; + } + }; + + /** + * @hide + */ + @Override + public int describeContents() { + return 0; + } +} diff --git a/core/java/android/speech/tts/TextToSpeech.java b/core/java/android/speech/tts/TextToSpeech.java index 2752085..8fecf9e 100644 --- a/core/java/android/speech/tts/TextToSpeech.java +++ b/core/java/android/speech/tts/TextToSpeech.java @@ -54,7 +54,9 @@ import java.util.Set; * When you are done using the TextToSpeech instance, call the {@link #shutdown()} method * to release the native resources used by the TextToSpeech engine. * + * @deprecated Use @{link android.speech.tts.v2.TextToSpeechClient} instead */ +@Deprecated public class TextToSpeech { private static final String TAG = "TextToSpeech"; @@ -970,7 +972,7 @@ public class TextToSpeech { @Override public Integer run(ITextToSpeechService service) throws RemoteException { return service.playSilence(getCallerIdentity(), durationInMs, queueMode, - getParams(params)); + params == null ? null : params.get(Engine.KEY_PARAM_UTTERANCE_ID)); } }, ERROR, "playSilence"); } @@ -1443,8 +1445,17 @@ public class TextToSpeech { private boolean mEstablished; private final ITextToSpeechCallback.Stub mCallback = new ITextToSpeechCallback.Stub() { + public void onStop(String utteranceId) throws RemoteException { + // do nothing + }; + + @Override + public void onFallback(String utteranceId) throws RemoteException { + // do nothing + } + @Override - public void onDone(String utteranceId) { + public void onSuccess(String utteranceId) { UtteranceProgressListener listener = mUtteranceProgressListener; if (listener != null) { listener.onDone(utteranceId); @@ -1452,7 +1463,7 @@ public class TextToSpeech { } @Override - public void onError(String utteranceId) { + public void onError(String utteranceId, int errorCode) { UtteranceProgressListener listener = mUtteranceProgressListener; if (listener != null) { listener.onError(utteranceId); @@ -1466,6 +1477,11 @@ public class TextToSpeech { listener.onStart(utteranceId); } } + + @Override + public void onVoicesInfoChange(List<VoiceInfo> voicesInfo) throws RemoteException { + // Ignore it + } }; private class SetupConnectionAsyncTask extends AsyncTask<Void, Void, Integer> { diff --git a/core/java/android/speech/tts/TextToSpeechClient.java b/core/java/android/speech/tts/TextToSpeechClient.java new file mode 100644 index 0000000..ac60923 --- /dev/null +++ b/core/java/android/speech/tts/TextToSpeechClient.java @@ -0,0 +1,1055 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package android.speech.tts; + +import android.app.Activity; +import android.app.Application; +import android.content.ComponentName; +import android.content.Context; +import android.content.Intent; +import android.content.ServiceConnection; +import android.media.AudioManager; +import android.net.Uri; +import android.os.AsyncTask; +import android.os.IBinder; +import android.os.ParcelFileDescriptor; +import android.os.RemoteException; +import android.speech.tts.ITextToSpeechCallback; +import android.speech.tts.ITextToSpeechService; +import android.speech.tts.TextToSpeech.Engine; +import android.util.Log; +import android.util.Pair; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +/** + * Synthesizes speech from text for immediate playback or to create a sound + * file. + * <p> + * This is an updated version of the speech synthesis client that supersedes + * {@link android.speech.tts.TextToSpeech}. + * <p> + * A TextToSpeechClient instance can only be used to synthesize text once it has + * connected to the service. The TextToSpeechClient instance will start establishing + * the connection after a call to the {@link #connect()} method. This is usually done in + * {@link Application#onCreate()} or {@link Activity#onCreate}. When the connection + * is established, the instance will call back using the + * {@link TextToSpeechClient.ConnectionCallbacks} interface. Only after a + * successful callback is the client usable. + * <p> + * After successful connection, the list of all available voices can be obtained + * by calling the {@link TextToSpeechClient#getEngineStatus() method. The client can + * choose a voice using some custom heuristic and build a {@link RequestConfig} object + * using {@link RequestConfig.Builder}, or can use one of the common heuristics found + * in ({@link RequestConfigHelper}. + * <p> + * When you are done using the TextToSpeechClient instance, call the + * {@link #disconnect()} method to release the connection. + * <p> + * In the rare case of a change to the set of available voices, the service will call to the + * {@link ConnectionCallbacks#onEngineStatusChange} with new set of available voices as argument. + * In response, the client HAVE to recreate all {@link RequestConfig} instances in use. + */ +public final class TextToSpeechClient { + private static final String TAG = TextToSpeechClient.class.getSimpleName(); + + private final Object mLock = new Object(); + private final TtsEngines mEnginesHelper; + private final Context mContext; + + // Guarded by mLock + private Connection mServiceConnection; + private final RequestCallbacks mDefaultRequestCallbacks; + private final ConnectionCallbacks mConnectionCallbacks; + private EngineStatus mEngineStatus; + private String mRequestedEngine; + private boolean mFallbackToDefault; + private HashMap<String, Pair<UtteranceId, RequestCallbacks>> mCallbacks; + // Guarded by mLock + + /** Common voices parameters */ + public static final class Params { + private Params() {} + + /** + * Maximum allowed time for a single request attempt, in milliseconds, before synthesis + * fails (or fallback request starts, if requested using + * {@link #FALLBACK_VOICE_ID}). + */ + public static final String NETWORK_TIMEOUT_MS = "networkTimeoutMs"; + + /** + * Number of network request retries that are attempted in case of failure + */ + public static final String NETWORK_RETRIES_COUNT = "networkRetriesCount"; + + /** + * Should synthesizer report sub-utterance progress on synthesis. Only applicable + * for the {@link TextToSpeechClient#queueSpeak} method. + */ + public static final String TRACK_SUBUTTERANCE_PROGRESS = "trackSubutteranceProgress"; + + /** + * If a voice exposes this parameter then it supports the fallback request feature. + * + * If it is set to a valid id of some other voice ({@link VoiceInfo#getId()}) then + * in case of request failure (due to network problems or missing data), fallback request + * will be attempted. Request will be done using the voice referenced by this parameter. + * If it is the case, the client will be informed by a callback to the {@link + * RequestCallbacks#onSynthesisFallback(UtteranceId)}. + */ + public static final String FALLBACK_VOICE_ID = "fallbackVoiceId"; + + /** + * Audio parameter for specifying a linear multiplier to the speaking speed of the voice. + * The value is a float. Values below zero decrease speed of the synthesized speech + * values above one increase it. If the value of this parameter is equal to zero, + * then it will be replaced by a settings-configurable default before it reaches + * TTS service. + */ + public static final String SPEECH_SPEED = "speechSpeed"; + + /** + * Audio parameter for controlling the pitch of the output. The Value is a positive float, + * with default of {@code 1.0}. The value is used to scale the primary frequency linearly. + * Lower values lower the tone of the synthesized voice, greater values increase it. + */ + public static final String SPEECH_PITCH = "speechPitch"; + + /** + * Audio parameter for controlling output volume. Value is a float with scale of 0 to 1 + */ + public static final String AUDIO_PARAM_VOLUME = TextToSpeech.Engine.KEY_PARAM_VOLUME; + + /** + * Audio parameter for controlling output pan. + * Value is a float ranging from -1 to +1 where -1 maps to a hard-left pan, + * 0 to center (the default behavior), and +1 to hard-right. + */ + public static final String AUDIO_PARAM_PAN = TextToSpeech.Engine.KEY_PARAM_PAN; + + /** + * Audio parameter for specifying the audio stream type to be used when speaking text + * or playing back a file. The value should be one of the STREAM_ constants + * defined in {@link AudioManager}. + */ + public static final String AUDIO_PARAM_STREAM = TextToSpeech.Engine.KEY_PARAM_STREAM; + } + + /** + * Result codes for TTS operations. + */ + public static final class Status { + private Status() {} + + /** + * Denotes a successful operation. + */ + public static final int SUCCESS = 0; + + /** + * Denotes a stop requested by a client. It's used only on the service side of the API, + * client should never expect to see this result code. + */ + public static final int STOPPED = 100; + + /** + * Denotes a generic failure. + */ + public static final int ERROR_UNKNOWN = -1; + + /** + * Denotes a failure of a TTS engine to synthesize the given input. + */ + public static final int ERROR_SYNTHESIS = 10; + + /** + * Denotes a failure of a TTS service. + */ + public static final int ERROR_SERVICE = 11; + + /** + * Denotes a failure related to the output (audio device or a file). + */ + public static final int ERROR_OUTPUT = 12; + + /** + * Denotes a failure caused by a network connectivity problems. + */ + public static final int ERROR_NETWORK = 13; + + /** + * Denotes a failure caused by network timeout. + */ + public static final int ERROR_NETWORK_TIMEOUT = 14; + + /** + * Denotes a failure caused by an invalid request. + */ + public static final int ERROR_INVALID_REQUEST = 15; + + /** + * Denotes a failure related to passing a non-unique utterance id. + */ + public static final int ERROR_NON_UNIQUE_UTTERANCE_ID = 16; + + /** + * Denotes a failure related to missing data. The TTS implementation may download + * the missing data, and if so, request will succeed in future. This error can only happen + * for voices with {@link VoiceInfo#FEATURE_MAY_AUTOINSTALL} feature. + * Note: the recommended way to avoid this error is to create a request with the fallback + * voice. + */ + public static final int ERROR_DOWNLOADING_ADDITIONAL_DATA = 17; + } + + /** + * Set of callbacks for the events related to the progress of a synthesis request + * through the synthesis queue. Each synthesis request is associated with a call to + * {@link #queueSpeak} or {@link #queueSynthesizeToFile}. + * + * The callbacks specified in this method will NOT be called on UI thread. + */ + public static abstract class RequestCallbacks { + /** + * Called after synthesis of utterance successfully starts. + */ + public void onSynthesisStart(UtteranceId utteranceId) {} + + /** + * Called after synthesis successfully finishes. + * @param utteranceId + * Unique identifier of synthesized utterance. + */ + public void onSynthesisSuccess(UtteranceId utteranceId) {} + + /** + * Called after synthesis was stopped in middle of synthesis process. + * @param utteranceId + * Unique identifier of synthesized utterance. + */ + public void onSynthesisStop(UtteranceId utteranceId) {} + + /** + * Called when requested synthesis failed and fallback synthesis is about to be attempted. + * + * Requires voice with available {@link TextToSpeechClient.Params#FALLBACK_VOICE_ID} + * parameter, and request with this parameter enabled. + * + * This callback will be followed by callback to the {@link #onSynthesisStart}, + * {@link #onSynthesisFailure} or {@link #onSynthesisSuccess} that depends on the + * fallback outcome. + * + * For more fallback feature reference, look at the + * {@link TextToSpeechClient.Params#FALLBACK_VOICE_ID}. + * + * @param utteranceId + * Unique identifier of synthesized utterance. + */ + public void onSynthesisFallback(UtteranceId utteranceId) {} + + /** + * Called after synthesis of utterance fails. + * + * It may be called instead or after a {@link #onSynthesisStart} callback. + * + * @param utteranceId + * Unique identifier of synthesized utterance. + * @param errorCode + * One of the values from {@link Status}. + */ + public void onSynthesisFailure(UtteranceId utteranceId, int errorCode) {} + + /** + * Called during synthesis to mark synthesis progress. + * + * Requires voice with available + * {@link TextToSpeechClient.Params#TRACK_SUBUTTERANCE_PROGRESS} parameter, and + * request with this parameter enabled. + * + * @param utteranceId + * Unique identifier of synthesized utterance. + * @param charIndex + * String index (java char offset) of recently synthesized character. + * @param msFromStart + * Miliseconds from the start of the synthesis. + */ + public void onSynthesisProgress(UtteranceId utteranceId, int charIndex, int msFromStart) {} + } + + /** + * Interface definition of callbacks that are called when the client is + * connected or disconnected from the TTS service. + */ + public static interface ConnectionCallbacks { + /** + * After calling {@link TextToSpeechClient#connect()}, this method will be invoked + * asynchronously when the connect request has successfully completed. + * + * Clients are strongly encouraged to call {@link TextToSpeechClient#getEngineStatus()} + * and create {@link RequestConfig} objects used in subsequent synthesis requests. + */ + public void onConnectionSuccess(); + + /** + * After calling {@link TextToSpeechClient#connect()}, this method may be invoked + * asynchronously when the connect request has failed to complete. + * + * It may be also invoked synchronously, from the body of + * {@link TextToSpeechClient#connect()} method. + */ + public void onConnectionFailure(); + + /** + * Called when the connection to the service is lost. This can happen if there is a problem + * with the speech service (e.g. a crash or resource problem causes it to be killed by the + * system). When called, all requests have been canceled and no outstanding listeners will + * be executed. Applications should disable UI components that require the service. + */ + public void onServiceDisconnected(); + + /** + * After receiving {@link #onConnectionSuccess()} callback, this method may be invoked + * if engine status obtained from {@link TextToSpeechClient#getEngineStatus()}) changes. + * It usually means that some voices were removed, changed or added. + * + * Clients are required to recreate {@link RequestConfig} objects used in subsequent + * synthesis requests. + */ + public void onEngineStatusChange(EngineStatus newEngineStatus); + } + + /** State of voices as provided by engine and user. */ + public static final class EngineStatus { + /** All available voices. */ + private final List<VoiceInfo> mVoices; + + /** Name of the TTS engine package */ + private final String mPackageName; + + private EngineStatus(String packageName, List<VoiceInfo> voices) { + this.mVoices = Collections.unmodifiableList(voices); + this.mPackageName = packageName; + } + + /** + * Get an immutable list of all Voices exposed by the TTS engine. + */ + public List<VoiceInfo> getVoices() { + return mVoices; + } + + /** + * Get name of the TTS engine package currently in use. + */ + public String getEnginePackage() { + return mPackageName; + } + } + + /** Unique synthesis request identifier. */ + public static final class UtteranceId { + private final String mDescription; + /** + * Create new, unique UtteranceId instance. + */ + public UtteranceId() { + mDescription = null; + } + + /** + * Create new, unique UtteranceId instance. + * + * @param description Additional string, that will be appended to + * {@link #toUniqueString()} output, allowing easier identification of the utterance in + * callbacks. + */ + public UtteranceId(String description) { + mDescription = description; + } + + /** + * Returns a unique string associated with an instance of this object. + * + * If you subclass {@link UtteranceId} make sure that output of this method is + * consistent across multiple calls and unique for the instance. + * + * This string will be used to identify the synthesis request/utterance inside the + * TTS service. + */ + public String toUniqueString() { + return mDescription == null ? "UtteranceId" + System.identityHashCode(this) : + "UtteranceId" + System.identityHashCode(this) + ": " + mDescription; + } + } + + /** + * Create TextToSpeech service client. + * + * Will connect to the default TTS service. In order to be usable, {@link #connect()} need + * to be called first and successful connection callback need to be received. + * + * @param context + * The context this instance is running in. + * @param engine + * Package name of requested TTS engine. If it's null, then default engine will + * be selected regardless of {@code fallbackToDefaultEngine} parameter value. + * @param fallbackToDefaultEngine + * If requested engine is not available, should we fallback to the default engine? + * @param defaultRequestCallbacks + * Default request callbacks, it will be used for all synthesis requests without + * supplied RequestCallbacks instance. Can't be null. + * @param connectionCallbacks + * Callbacks for connecting and disconnecting from the service. Can't be null. + */ + public TextToSpeechClient(Context context, + String engine, boolean fallbackToDefaultEngine, + RequestCallbacks defaultRequestCallbacks, + ConnectionCallbacks connectionCallbacks) { + if (context == null) + throw new IllegalArgumentException("context can't be null"); + if (defaultRequestCallbacks == null) + throw new IllegalArgumentException("defaultRequestCallbacks can't be null"); + if (connectionCallbacks == null) + throw new IllegalArgumentException("connectionCallbacks can't be null"); + mContext = context; + mEnginesHelper = new TtsEngines(mContext); + mCallbacks = new HashMap<String, Pair<UtteranceId, RequestCallbacks>>(); + mDefaultRequestCallbacks = defaultRequestCallbacks; + mConnectionCallbacks = connectionCallbacks; + + mRequestedEngine = engine; + mFallbackToDefault = fallbackToDefaultEngine; + } + + /** + * Create TextToSpeech service client. Will connect to the default TTS + * service. In order to be usable, {@link #connect()} need to be called + * first and successful connection callback need to be received. + * + * @param context Context this instance is running in. + * @param defaultRequestCallbacks Default request callbacks, it + * will be used for all synthesis requests without supplied + * RequestCallbacks instance. Can't be null. + * @param connectionCallbacks Callbacks for connecting and disconnecting + * from the service. Can't be null. + */ + public TextToSpeechClient(Context context, RequestCallbacks defaultRequestCallbacks, + ConnectionCallbacks connectionCallbacks) { + this(context, null, true, defaultRequestCallbacks, connectionCallbacks); + } + + + private boolean initTts(String requestedEngine, boolean fallbackToDefaultEngine) { + // Step 1: Try connecting to the engine that was requested. + if (requestedEngine != null) { + if (mEnginesHelper.isEngineInstalled(requestedEngine)) { + if ((mServiceConnection = connectToEngine(requestedEngine)) != null) { + return true; + } else if (!fallbackToDefaultEngine) { + Log.w(TAG, "Couldn't connect to requested engine: " + requestedEngine); + return false; + } + } else if (!fallbackToDefaultEngine) { + Log.w(TAG, "Requested engine not installed: " + requestedEngine); + return false; + } + } + + // Step 2: Try connecting to the user's default engine. + final String defaultEngine = mEnginesHelper.getDefaultEngine(); + if (defaultEngine != null && !defaultEngine.equals(requestedEngine)) { + if ((mServiceConnection = connectToEngine(defaultEngine)) != null) { + return true; + } + } + + // Step 3: Try connecting to the highest ranked engine in the + // system. + final String highestRanked = mEnginesHelper.getHighestRankedEngineName(); + if (highestRanked != null && !highestRanked.equals(requestedEngine) && + !highestRanked.equals(defaultEngine)) { + if ((mServiceConnection = connectToEngine(highestRanked)) != null) { + return true; + } + } + + Log.w(TAG, "Couldn't find working TTS engine"); + return false; + } + + private Connection connectToEngine(String engine) { + Connection connection = new Connection(engine); + Intent intent = new Intent(TextToSpeech.Engine.INTENT_ACTION_TTS_SERVICE); + intent.setPackage(engine); + boolean bound = mContext.bindService(intent, connection, Context.BIND_AUTO_CREATE); + if (!bound) { + Log.e(TAG, "Failed to bind to " + engine); + return null; + } else { + Log.i(TAG, "Successfully bound to " + engine); + return connection; + } + } + + + /** + * Connects the client to TTS service. This method returns immediately, and connects to the + * service in the background. + * + * After connection initializes successfully, {@link ConnectionCallbacks#onConnectionSuccess()} + * is called. On a failure {@link ConnectionCallbacks#onConnectionFailure} is called. + * + * Both of those callback may be called asynchronously on the main thread, + * {@link ConnectionCallbacks#onConnectionFailure} may be called synchronously, before + * this method returns. + */ + public void connect() { + synchronized (mLock) { + if (mServiceConnection != null) { + return; + } + if(!initTts(mRequestedEngine, mFallbackToDefault)) { + mConnectionCallbacks.onConnectionFailure(); + } + } + } + + /** + * Checks if the client is currently connected to the service, so that + * requests to other methods will succeed. + */ + public boolean isConnected() { + synchronized (mLock) { + return mServiceConnection != null && mServiceConnection.isEstablished(); + } + } + + /** + * Closes the connection to TextToSpeech service. No calls can be made on this object after + * calling this method. + * It is good practice to call this method in the onDestroy() method of an Activity + * so the TextToSpeech engine can be cleanly stopped. + */ + public void disconnect() { + synchronized (mLock) { + if (mServiceConnection != null) { + mServiceConnection.disconnect(); + mServiceConnection = null; + mCallbacks.clear(); + } + } + } + + /** + * Register callback. + * + * @param utteranceId Non-null UtteranceId instance. + * @param callback Non-null callbacks for the request + * @return Status.SUCCESS or error code in case of invalid arguments. + */ + private int addCallback(UtteranceId utteranceId, RequestCallbacks callback) { + synchronized (mLock) { + if (utteranceId == null || callback == null) { + return Status.ERROR_INVALID_REQUEST; + } + if (mCallbacks.put(utteranceId.toUniqueString(), + new Pair<UtteranceId, RequestCallbacks>(utteranceId, callback)) != null) { + return Status.ERROR_NON_UNIQUE_UTTERANCE_ID; + } + return Status.SUCCESS; + } + } + + /** + * Remove and return callback. + * + * @param utteranceIdStr Unique string obtained from {@link UtteranceId#toUniqueString}. + */ + private Pair<UtteranceId, RequestCallbacks> removeCallback(String utteranceIdStr) { + synchronized (mLock) { + return mCallbacks.remove(utteranceIdStr); + } + } + + /** + * Get callback and utterance id. + * + * @param utteranceIdStr Unique string obtained from {@link UtteranceId#toUniqueString}. + */ + private Pair<UtteranceId, RequestCallbacks> getCallback(String utteranceIdStr) { + synchronized (mLock) { + return mCallbacks.get(utteranceIdStr); + } + } + + /** + * Remove callback and call {@link RequestCallbacks#onSynthesisFailure} with passed + * error code. + * + * @param utteranceIdStr Unique string obtained from {@link UtteranceId#toUniqueString}. + * @param errorCode argument to {@link RequestCallbacks#onSynthesisFailure} call. + */ + private void removeCallbackAndErr(String utteranceIdStr, int errorCode) { + synchronized (mLock) { + Pair<UtteranceId, RequestCallbacks> c = mCallbacks.remove(utteranceIdStr); + c.second.onSynthesisFailure(c.first, errorCode); + } + } + + /** + * Retrieve TTS engine status @{link VoicesStatus}. Requires connected client. + */ + public EngineStatus getEngineStatus() { + synchronized (mLock) { + return mEngineStatus; + } + } + + /** + * Query TTS engine about available voices and defaults. + * + * @return EngineStatus is connected or null if client is disconnected. + */ + private EngineStatus requestEngineStatus(ITextToSpeechService service) + throws RemoteException { + List<VoiceInfo> voices = service.getVoicesInfo(); + if (voices == null) { + Log.e(TAG, "Requested engine doesn't support TTS V2 API"); + return null; + } + + return new EngineStatus(mServiceConnection.getEngineName(), voices); + } + + private class Connection implements ServiceConnection { + private final String mEngineName; + + private ITextToSpeechService mService; + + private boolean mEstablished; + + private PrepareConnectionAsyncTask mSetupConnectionAsyncTask; + + public Connection(String engineName) { + this.mEngineName = engineName; + } + + private final ITextToSpeechCallback.Stub mCallback = new ITextToSpeechCallback.Stub() { + + @Override + public void onStart(String utteranceIdStr) { + synchronized (mLock) { + Pair<UtteranceId, RequestCallbacks> callbacks = getCallback(utteranceIdStr); + callbacks.second.onSynthesisStart(callbacks.first); + } + } + + public void onStop(String utteranceIdStr) { + synchronized (mLock) { + Pair<UtteranceId, RequestCallbacks> callbacks = removeCallback(utteranceIdStr); + callbacks.second.onSynthesisStop(callbacks.first); + } + } + + @Override + public void onSuccess(String utteranceIdStr) { + synchronized (mLock) { + Pair<UtteranceId, RequestCallbacks> callbacks = removeCallback(utteranceIdStr); + callbacks.second.onSynthesisSuccess(callbacks.first); + } + } + + public void onFallback(String utteranceIdStr) { + synchronized (mLock) { + Pair<UtteranceId, RequestCallbacks> callbacks = getCallback(utteranceIdStr); + callbacks.second.onSynthesisFallback(callbacks.first); + } + }; + + @Override + public void onError(String utteranceIdStr, int errorCode) { + removeCallbackAndErr(utteranceIdStr, errorCode); + } + + @Override + public void onVoicesInfoChange(List<VoiceInfo> voicesInfo) { + synchronized (mLock) { + mEngineStatus = new EngineStatus(mServiceConnection.getEngineName(), + voicesInfo); + mConnectionCallbacks.onEngineStatusChange(mEngineStatus); + } + } + }; + + private class PrepareConnectionAsyncTask extends AsyncTask<Void, Void, EngineStatus> { + + private final ComponentName mName; + + public PrepareConnectionAsyncTask(ComponentName name) { + mName = name; + } + + @Override + protected EngineStatus doInBackground(Void... params) { + synchronized(mLock) { + if (isCancelled()) { + return null; + } + try { + mService.setCallback(getCallerIdentity(), mCallback); + return requestEngineStatus(mService); + } catch (RemoteException re) { + Log.e(TAG, "Error setting up the TTS service"); + return null; + } + } + } + + @Override + protected void onPostExecute(EngineStatus result) { + synchronized(mLock) { + if (mSetupConnectionAsyncTask == this) { + mSetupConnectionAsyncTask = null; + } + if (result == null) { + Log.e(TAG, "Setup task failed"); + disconnect(); + mConnectionCallbacks.onConnectionFailure(); + return; + } + + mEngineStatus = result; + mEstablished = true; + } + mConnectionCallbacks.onConnectionSuccess(); + } + } + + @Override + public void onServiceConnected(ComponentName name, IBinder service) { + Log.i(TAG, "Connected to " + name); + + synchronized(mLock) { + mEstablished = false; + mService = ITextToSpeechService.Stub.asInterface(service); + startSetupConnectionTask(name); + } + } + + @Override + public void onServiceDisconnected(ComponentName name) { + Log.i(TAG, "Asked to disconnect from " + name); + + synchronized(mLock) { + stopSetupConnectionTask(); + } + mConnectionCallbacks.onServiceDisconnected(); + } + + private void startSetupConnectionTask(ComponentName name) { + stopSetupConnectionTask(); + mSetupConnectionAsyncTask = new PrepareConnectionAsyncTask(name); + mSetupConnectionAsyncTask.execute(); + } + + private boolean stopSetupConnectionTask() { + boolean result = false; + if (mSetupConnectionAsyncTask != null) { + result = mSetupConnectionAsyncTask.cancel(false); + mSetupConnectionAsyncTask = null; + } + return result; + } + + IBinder getCallerIdentity() { + return mCallback; + } + + boolean isEstablished() { + return mService != null && mEstablished; + } + + boolean runAction(Action action) { + synchronized (mLock) { + try { + action.run(mService); + return true; + } catch (Exception ex) { + Log.e(TAG, action.getName() + " failed", ex); + disconnect(); + return false; + } + } + } + + void disconnect() { + mContext.unbindService(this); + stopSetupConnectionTask(); + mService = null; + mEstablished = false; + if (mServiceConnection == this) { + mServiceConnection = null; + } + } + + String getEngineName() { + return mEngineName; + } + } + + private abstract class Action { + private final String mName; + + public Action(String name) { + mName = name; + } + + public String getName() {return mName;} + abstract void run(ITextToSpeechService service) throws RemoteException; + } + + private IBinder getCallerIdentity() { + if (mServiceConnection != null) { + return mServiceConnection.getCallerIdentity(); + } + return null; + } + + private boolean runAction(Action action) { + synchronized (mLock) { + if (mServiceConnection == null) { + return false; + } + if (!mServiceConnection.isEstablished()) { + return false; + } + mServiceConnection.runAction(action); + return true; + } + } + + private static final String ACTION_STOP_NAME = "stop"; + + /** + * Interrupts the current utterance spoken (whether played or rendered to file) and discards + * other utterances in the queue. + */ + public void stop() { + runAction(new Action(ACTION_STOP_NAME) { + @Override + public void run(ITextToSpeechService service) throws RemoteException { + if (service.stop(getCallerIdentity()) != Status.SUCCESS) { + Log.e(TAG, "Stop failed"); + } + mCallbacks.clear(); + } + }); + } + + private static final String ACTION_QUEUE_SPEAK_NAME = "queueSpeak"; + + /** + * Speaks the string using the specified queuing strategy using current + * voice. This method is asynchronous, i.e. the method just adds the request + * to the queue of TTS requests and then returns. The synthesis might not + * have finished (or even started!) at the time when this method returns. + * + * @param utterance The string of text to be spoken. No longer than + * 1000 characters. + * @param utteranceId Unique identificator used to track the synthesis progress + * in {@link RequestCallbacks}. + * @param config Synthesis request configuration. Can't be null. Has to contain a + * voice. + * @param callbacks Synthesis request callbacks. If null, default request + * callbacks object will be used. + */ + public void queueSpeak(final String utterance, final UtteranceId utteranceId, + final RequestConfig config, + final RequestCallbacks callbacks) { + runAction(new Action(ACTION_QUEUE_SPEAK_NAME) { + @Override + public void run(ITextToSpeechService service) throws RemoteException { + RequestCallbacks c = mDefaultRequestCallbacks; + if (callbacks != null) { + c = callbacks; + } + int addCallbackStatus = addCallback(utteranceId, c); + if (addCallbackStatus != Status.SUCCESS) { + c.onSynthesisFailure(utteranceId, Status.ERROR_INVALID_REQUEST); + return; + } + + int queueResult = service.speakV2( + getCallerIdentity(), + new SynthesisRequestV2(utterance, utteranceId.toUniqueString(), config)); + if (queueResult != Status.SUCCESS) { + removeCallbackAndErr(utteranceId.toUniqueString(), queueResult); + } + } + }); + } + + private static final String ACTION_QUEUE_SYNTHESIZE_TO_FILE = "queueSynthesizeToFile"; + + /** + * Synthesizes the given text to a file using the specified parameters. This + * method is asynchronous, i.e. the method just adds the request to the + * queue of TTS requests and then returns. The synthesis might not have + * finished (or even started!) at the time when this method returns. + * + * @param utterance The text that should be synthesized. No longer than + * 1000 characters. + * @param utteranceId Unique identificator used to track the synthesis progress + * in {@link RequestCallbacks}. + * @param outputFile File to write the generated audio data to. + * @param config Synthesis request configuration. Can't be null. Have to contain a + * voice. + * @param callbacks Synthesis request callbacks. If null, default request + * callbacks object will be used. + */ + public void queueSynthesizeToFile(final String utterance, final UtteranceId utteranceId, + final File outputFile, final RequestConfig config, + final RequestCallbacks callbacks) { + runAction(new Action(ACTION_QUEUE_SYNTHESIZE_TO_FILE) { + @Override + public void run(ITextToSpeechService service) throws RemoteException { + RequestCallbacks c = mDefaultRequestCallbacks; + if (callbacks != null) { + c = callbacks; + } + int addCallbackStatus = addCallback(utteranceId, c); + if (addCallbackStatus != Status.SUCCESS) { + c.onSynthesisFailure(utteranceId, Status.ERROR_INVALID_REQUEST); + return; + } + + ParcelFileDescriptor fileDescriptor = null; + try { + if (outputFile.exists() && !outputFile.canWrite()) { + Log.e(TAG, "No permissions to write to " + outputFile); + removeCallbackAndErr(utteranceId.toUniqueString(), Status.ERROR_OUTPUT); + return; + } + fileDescriptor = ParcelFileDescriptor.open(outputFile, + ParcelFileDescriptor.MODE_WRITE_ONLY | + ParcelFileDescriptor.MODE_CREATE | + ParcelFileDescriptor.MODE_TRUNCATE); + + int queueResult = service.synthesizeToFileDescriptorV2(getCallerIdentity(), + fileDescriptor, + new SynthesisRequestV2(utterance, utteranceId.toUniqueString(), + config)); + fileDescriptor.close(); + if (queueResult != Status.SUCCESS) { + removeCallbackAndErr(utteranceId.toUniqueString(), queueResult); + } + } catch (FileNotFoundException e) { + Log.e(TAG, "Opening file " + outputFile + " failed", e); + removeCallbackAndErr(utteranceId.toUniqueString(), Status.ERROR_OUTPUT); + } catch (IOException e) { + Log.e(TAG, "Closing file " + outputFile + " failed", e); + removeCallbackAndErr(utteranceId.toUniqueString(), Status.ERROR_OUTPUT); + } + } + }); + } + + private static final String ACTION_QUEUE_SILENCE_NAME = "queueSilence"; + + /** + * Plays silence for the specified amount of time. This method is asynchronous, + * i.e. the method just adds the request to the queue of TTS requests and then + * returns. The synthesis might not have finished (or even started!) at the time + * when this method returns. + * + * @param durationInMs The duration of the silence in milliseconds. + * @param utteranceId Unique identificator used to track the synthesis progress + * in {@link RequestCallbacks}. + * @param callbacks Synthesis request callbacks. If null, default request + * callbacks object will be used. + */ + public void queueSilence(final long durationInMs, final UtteranceId utteranceId, + final RequestCallbacks callbacks) { + runAction(new Action(ACTION_QUEUE_SILENCE_NAME) { + @Override + public void run(ITextToSpeechService service) throws RemoteException { + RequestCallbacks c = mDefaultRequestCallbacks; + if (callbacks != null) { + c = callbacks; + } + int addCallbackStatus = addCallback(utteranceId, c); + if (addCallbackStatus != Status.SUCCESS) { + c.onSynthesisFailure(utteranceId, Status.ERROR_INVALID_REQUEST); + } + + int queueResult = service.playSilence(getCallerIdentity(), durationInMs, + TextToSpeech.QUEUE_ADD, utteranceId.toUniqueString()); + + if (queueResult != Status.SUCCESS) { + removeCallbackAndErr(utteranceId.toUniqueString(), queueResult); + } + } + }); + } + + + private static final String ACTION_QUEUE_AUDIO_NAME = "queueAudio"; + + /** + * Plays the audio resource using the specified parameters. + * This method is asynchronous, i.e. the method just adds the request to the queue of TTS + * requests and then returns. The synthesis might not have finished (or even started!) at the + * time when this method returns. + * + * @param audioUrl The audio resource that should be played + * @param utteranceId Unique identificator used to track synthesis progress + * in {@link RequestCallbacks}. + * @param config Synthesis request configuration. Can't be null. Doesn't have to contain a + * voice (only system parameters are used). + * @param callbacks Synthesis request callbacks. If null, default request + * callbacks object will be used. + */ + public void queueAudio(final Uri audioUrl, final UtteranceId utteranceId, + final RequestConfig config, final RequestCallbacks callbacks) { + runAction(new Action(ACTION_QUEUE_AUDIO_NAME) { + @Override + public void run(ITextToSpeechService service) throws RemoteException { + RequestCallbacks c = mDefaultRequestCallbacks; + if (callbacks != null) { + c = callbacks; + } + int addCallbackStatus = addCallback(utteranceId, c); + if (addCallbackStatus != Status.SUCCESS) { + c.onSynthesisFailure(utteranceId, Status.ERROR_INVALID_REQUEST); + } + + int queueResult = service.playAudioV2(getCallerIdentity(), audioUrl, + utteranceId.toUniqueString(), config.getVoiceParams()); + + if (queueResult != Status.SUCCESS) { + removeCallbackAndErr(utteranceId.toUniqueString(), queueResult); + } + } + }); + } +} diff --git a/core/java/android/speech/tts/TextToSpeechService.java b/core/java/android/speech/tts/TextToSpeechService.java index 575855c..b3e01ce 100644 --- a/core/java/android/speech/tts/TextToSpeechService.java +++ b/core/java/android/speech/tts/TextToSpeechService.java @@ -34,26 +34,27 @@ import android.speech.tts.TextToSpeech.Engine; import android.text.TextUtils; import android.util.Log; -import java.io.FileDescriptor; import java.io.FileOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Locale; +import java.util.Map; +import java.util.MissingResourceException; import java.util.Set; /** * Abstract base class for TTS engine implementations. The following methods - * need to be implemented. - * + * need to be implemented for V1 API ({@link TextToSpeech}) implementation. * <ul> - * <li>{@link #onIsLanguageAvailable}</li> - * <li>{@link #onLoadLanguage}</li> - * <li>{@link #onGetLanguage}</li> - * <li>{@link #onSynthesizeText}</li> - * <li>{@link #onStop}</li> + * <li>{@link #onIsLanguageAvailable}</li> + * <li>{@link #onLoadLanguage}</li> + * <li>{@link #onGetLanguage}</li> + * <li>{@link #onSynthesizeText}</li> + * <li>{@link #onStop}</li> * </ul> - * * The first three deal primarily with language management, and are used to * query the engine for it's support for a given language and indicate to it * that requests in a given language are imminent. @@ -61,22 +62,44 @@ import java.util.Set; * {@link #onSynthesizeText} is central to the engine implementation. The * implementation should synthesize text as per the request parameters and * return synthesized data via the supplied callback. This class and its helpers - * will then consume that data, which might mean queueing it for playback or writing - * it to a file or similar. All calls to this method will be on a single - * thread, which will be different from the main thread of the service. Synthesis - * must be synchronous which means the engine must NOT hold on the callback or call - * any methods on it after the method returns + * will then consume that data, which might mean queuing it for playback or writing + * it to a file or similar. All calls to this method will be on a single thread, + * which will be different from the main thread of the service. Synthesis must be + * synchronous which means the engine must NOT hold on to the callback or call any + * methods on it after the method returns. * - * {@link #onStop} tells the engine that it should stop all ongoing synthesis, if - * any. Any pending data from the current synthesis will be discarded. + * {@link #onStop} tells the engine that it should stop + * all ongoing synthesis, if any. Any pending data from the current synthesis + * will be discarded. * + * {@link #onGetLanguage} is not required as of JELLYBEAN_MR2 (API 18) and later, it is only + * called on earlier versions of Android. + * <p> + * In order to fully support the V2 API ({@link TextToSpeechClient}), + * these methods must be implemented: + * <ul> + * <li>{@link #onSynthesizeTextV2}</li> + * <li>{@link #checkVoicesInfo}</li> + * <li>{@link #onVoicesInfoChange}</li> + * <li>{@link #implementsV2API}</li> + * </ul> + * In addition {@link #implementsV2API} has to return true. + * <p> + * If the service does not implement these methods and {@link #implementsV2API} returns false, + * then the V2 API will be provided by converting V2 requests ({@link #onSynthesizeTextV2}) + * to V1 requests ({@link #onSynthesizeText}). On service setup, all of the available device + * locales will be fed to {@link #onIsLanguageAvailable} to check if they are supported. + * If they are, embedded and/or network voices will be created depending on the result of + * {@link #onGetFeaturesForLanguage}. + * <p> + * Note that a V2 service will still receive requests from V1 clients and has to implement all + * of the V1 API methods. */ public abstract class TextToSpeechService extends Service { private static final boolean DBG = false; private static final String TAG = "TextToSpeechService"; - private static final String SYNTH_THREAD_NAME = "SynthThread"; private SynthHandler mSynthHandler; @@ -89,6 +112,11 @@ public abstract class TextToSpeechService extends Service { private CallbackMap mCallbacks; private String mPackageName; + private final Object mVoicesInfoLock = new Object(); + + private List<VoiceInfo> mVoicesInfoList; + private Map<Integer, VoiceInfo> mVoicesInfoLookup; + @Override public void onCreate() { if (DBG) Log.d(TAG, "onCreate()"); @@ -108,6 +136,7 @@ public abstract class TextToSpeechService extends Service { mPackageName = getApplicationInfo().packageName; String[] defaultLocale = getSettingsLocale(); + // Load default language onLoadLanguage(defaultLocale[0], defaultLocale[1], defaultLocale[2]); } @@ -148,6 +177,9 @@ public abstract class TextToSpeechService extends Service { /** * Returns the language, country and variant currently being used by the TTS engine. * + * This method will be called only on Android 4.2 and before (API <= 17). In later versions + * this method is not called by the Android TTS framework. + * * Can be called on multiple threads. * * @return A 3-element array, containing language (ISO 3-letter code), @@ -191,21 +223,159 @@ public abstract class TextToSpeechService extends Service { protected abstract void onStop(); /** - * Tells the service to synthesize speech from the given text. This method should - * block until the synthesis is finished. - * - * Called on the synthesis thread. + * Tells the service to synthesize speech from the given text. This method + * should block until the synthesis is finished. Used for requests from V1 + * clients ({@link android.speech.tts.TextToSpeech}). Called on the synthesis + * thread. * * @param request The synthesis request. - * @param callback The callback the the engine must use to make data available for - * playback or for writing to a file. + * @param callback The callback that the engine must use to make data + * available for playback or for writing to a file. */ protected abstract void onSynthesizeText(SynthesisRequest request, SynthesisCallback callback); /** + * Check the available voices data and return immutable list of available voices. + * Output of this method will be passed to clients to allow them to configure synthesis + * requests. + * + * Can be called on multiple threads. + * + * The result of this method will be saved and served to all TTS clients. If a TTS service wants + * to update the set of available voices, it should call the {@link #forceVoicesInfoCheck()} + * method. + */ + protected List<VoiceInfo> checkVoicesInfo() { + if (implementsV2API()) { + throw new IllegalStateException("For proper V2 API implementation this method has to" + + " be implemented"); + } + + // V2 to V1 interface adapter. This allows using V2 client interface on V1-only services. + Bundle defaultParams = new Bundle(); + defaultParams.putFloat(TextToSpeechClient.Params.SPEECH_PITCH, 1.0f); + defaultParams.putFloat(TextToSpeechClient.Params.SPEECH_SPEED, -1.0f); + + // Enumerate all locales and check if they are available + ArrayList<VoiceInfo> voicesInfo = new ArrayList<VoiceInfo>(); + int id = 0; + for (Locale locale : Locale.getAvailableLocales()) { + int expectedStatus = TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE; + if (locale.getVariant().isEmpty()) { + if (locale.getCountry().isEmpty()) { + expectedStatus = TextToSpeech.LANG_AVAILABLE; + } else { + expectedStatus = TextToSpeech.LANG_COUNTRY_AVAILABLE; + } + } + try { + int localeStatus = onIsLanguageAvailable(locale.getISO3Language(), + locale.getISO3Country(), locale.getVariant()); + if (localeStatus != expectedStatus) { + continue; + } + } catch (MissingResourceException e) { + // Ignore locale without iso 3 codes + continue; + } + + Set<String> features = onGetFeaturesForLanguage(locale.getISO3Language(), + locale.getISO3Country(), locale.getVariant()); + + VoiceInfo.Builder builder = new VoiceInfo.Builder(); + builder.setLatency(VoiceInfo.LATENCY_NORMAL); + builder.setQuality(VoiceInfo.QUALITY_NORMAL); + builder.setLocale(locale); + builder.setParamsWithDefaults(defaultParams); + + if (features == null || features.contains( + TextToSpeech.Engine.KEY_FEATURE_EMBEDDED_SYNTHESIS)) { + builder.setId(id++); + builder.setRequiresNetworkConnection(false); + voicesInfo.add(builder.build()); + } + + if (features != null && features.contains( + TextToSpeech.Engine.KEY_FEATURE_NETWORK_SYNTHESIS)) { + builder.setId(id++); + builder.setRequiresNetworkConnection(true); + voicesInfo.add(builder.build()); + } + } + + return voicesInfo; + } + + /** + * Tells the synthesis thread that it should reload voice data. + * There's a high probability that the underlying set of available voice data has changed. + * Called only on the synthesis thread. + */ + protected void onVoicesInfoChange() { + + } + + /** + * Tells the service to synthesize speech from the given text. This method + * should block until the synthesis is finished. Used for requests from V2 + * client {@link android.speech.tts.TextToSpeechClient}. Called on the + * synthesis thread. + * + * @param request The synthesis request. + * @param callback The callback the the engine must use to make data + * available for playback or for writing to a file. + */ + protected void onSynthesizeTextV2(SynthesisRequestV2 request, + VoiceInfo selectedVoice, + SynthesisCallback callback) { + if (implementsV2API()) { + throw new IllegalStateException("For proper V2 API implementation this method has to" + + " be implemented"); + } + + // Convert to V1 params + int speechRate = (int) (request.getVoiceParams().getFloat( + TextToSpeechClient.Params.SPEECH_SPEED, 1.0f) * 100); + int speechPitch = (int) (request.getVoiceParams().getFloat( + TextToSpeechClient.Params.SPEECH_PITCH, 1.0f) * 100); + + // Provide adapter to V1 API + Bundle params = new Bundle(); + params.putString(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, request.getUtteranceId()); + params.putInt(TextToSpeech.Engine.KEY_PARAM_PITCH, speechPitch); + params.putInt(TextToSpeech.Engine.KEY_PARAM_RATE, speechRate); + if (selectedVoice.getRequiresNetworkConnection()) { + params.putString(TextToSpeech.Engine.KEY_FEATURE_NETWORK_SYNTHESIS, "true"); + } else { + params.putString(TextToSpeech.Engine.KEY_FEATURE_EMBEDDED_SYNTHESIS, "true"); + } + + // Build V1 request + SynthesisRequest requestV1 = new SynthesisRequest(request.getText(), params); + Locale locale = selectedVoice.getLocale(); + requestV1.setLanguage(locale.getISO3Language(), locale.getISO3Country(), + locale.getVariant()); + requestV1.setSpeechRate(speechRate); + requestV1.setPitch(speechPitch); + + // Synthesize using V1 interface + onSynthesizeText(requestV1, callback); + } + + /** + * If true, this service implements proper V2 TTS API service. If it's false, + * V2 API will be provided through adapter. + */ + protected boolean implementsV2API() { + return false; + } + + /** * Queries the service for a set of features supported for a given language. * + * Can be called on multiple threads. + * * @param lang ISO-3 language code. * @param country ISO-3 country code. May be empty or null. * @param variant Language variant. May be empty or null. @@ -215,6 +385,68 @@ public abstract class TextToSpeechService extends Service { return null; } + private List<VoiceInfo> getVoicesInfo() { + synchronized (mVoicesInfoLock) { + if (mVoicesInfoList == null) { + // Get voices. Defensive copy to make sure TTS engine won't alter the list. + mVoicesInfoList = new ArrayList<VoiceInfo>(checkVoicesInfo()); + // Build lookup map + mVoicesInfoLookup = new HashMap<Integer, VoiceInfo>((int) (mVoicesInfoList.size()*1.5f)); + for (VoiceInfo voiceInfo : mVoicesInfoList) { + VoiceInfo prev = mVoicesInfoLookup.put(voiceInfo.getId(), voiceInfo); + if (prev != null) { + Log.e(TAG, "Duplicate ID (" + voiceInfo.getId() + ") of the voice "); + } + } + } + return mVoicesInfoList; + } + } + + public VoiceInfo getVoicesInfoWithId(int id) { + synchronized (mVoicesInfoLock) { + if (mVoicesInfoLookup != null) { + return mVoicesInfoLookup.get(id); + } + } + return null; + } + + /** + * Force TTS service to reevaluate the set of available languages. Will result in + * a call to {@link #checkVoicesInfo()} on the same thread, {@link #onVoicesInfoChange} + * on the synthesizer thread and callback to + * {@link TextToSpeechClient.ConnectionCallbacks#onEngineStatusChange} of all connected + * TTS clients. + * + * Use this method only if you know that set of available languages changed. + * + * Can be called on multiple threads. + */ + public void forceVoicesInfoCheck() { + synchronized (mVoicesInfoLock) { + List<VoiceInfo> old = mVoicesInfoList; + + mVoicesInfoList = null; // Force recreation of voices info list + getVoicesInfo(); + + if (mVoicesInfoList == null) { + throw new IllegalStateException("This method applies only to services " + + "supporting V2 TTS API. This services doesn't support V2 TTS API."); + } + + if (old != null) { + // Flush all existing items, and inform synthesis thread about the change. + mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_FLUSH, + new VoicesInfoChangeItem()); + // TODO: Handle items that may be added to queue after SynthesizerRestartItem + // but before client reconnection + // Disconnect all of them + mCallbacks.dispatchVoicesInfoChange(mVoicesInfoList); + } + } + } + private int getDefaultSpeechRate() { return getSecureSettingInt(Settings.Secure.TTS_DEFAULT_RATE, Engine.DEFAULT_RATE); } @@ -317,7 +549,8 @@ public abstract class TextToSpeechService extends Service { if (!speechItem.isValid()) { if (utterenceProgress != null) { - utterenceProgress.dispatchOnError(); + utterenceProgress.dispatchOnError( + TextToSpeechClient.Status.ERROR_INVALID_REQUEST); } return TextToSpeech.ERROR; } @@ -342,12 +575,13 @@ public abstract class TextToSpeechService extends Service { // // Note that this string is interned, so the == comparison works. msg.obj = speechItem.getCallerIdentity(); + if (sendMessage(msg)) { return TextToSpeech.SUCCESS; } else { Log.w(TAG, "SynthThread has quit"); if (utterenceProgress != null) { - utterenceProgress.dispatchOnError(); + utterenceProgress.dispatchOnError(TextToSpeechClient.Status.ERROR_SERVICE); } return TextToSpeech.ERROR; } @@ -399,9 +633,11 @@ public abstract class TextToSpeechService extends Service { } interface UtteranceProgressDispatcher { - public void dispatchOnDone(); + public void dispatchOnFallback(); + public void dispatchOnStop(); + public void dispatchOnSuccess(); public void dispatchOnStart(); - public void dispatchOnError(); + public void dispatchOnError(int errorCode); } /** @@ -409,15 +645,13 @@ public abstract class TextToSpeechService extends Service { */ private abstract class SpeechItem { private final Object mCallerIdentity; - protected final Bundle mParams; private final int mCallerUid; private final int mCallerPid; private boolean mStarted = false; private boolean mStopped = false; - public SpeechItem(Object caller, int callerUid, int callerPid, Bundle params) { + public SpeechItem(Object caller, int callerUid, int callerPid) { mCallerIdentity = caller; - mParams = params; mCallerUid = callerUid; mCallerPid = callerPid; } @@ -446,20 +680,18 @@ public abstract class TextToSpeechService extends Service { * Must not be called more than once. * * Only called on the synthesis thread. - * - * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. */ - public int play() { + public void play() { synchronized (this) { if (mStarted) { throw new IllegalStateException("play() called twice"); } mStarted = true; } - return playImpl(); + playImpl(); } - protected abstract int playImpl(); + protected abstract void playImpl(); /** * Stops the speech item. @@ -485,20 +717,37 @@ public abstract class TextToSpeechService extends Service { } /** - * An item in the synth thread queue that process utterance. + * An item in the synth thread queue that process utterance (and call back to client about + * progress). */ private abstract class UtteranceSpeechItem extends SpeechItem implements UtteranceProgressDispatcher { - public UtteranceSpeechItem(Object caller, int callerUid, int callerPid, Bundle params) { - super(caller, callerUid, callerPid, params); + public UtteranceSpeechItem(Object caller, int callerUid, int callerPid) { + super(caller, callerUid, callerPid); + } + + @Override + public void dispatchOnSuccess() { + final String utteranceId = getUtteranceId(); + if (utteranceId != null) { + mCallbacks.dispatchOnSuccess(getCallerIdentity(), utteranceId); + } } @Override - public void dispatchOnDone() { + public void dispatchOnStop() { final String utteranceId = getUtteranceId(); if (utteranceId != null) { - mCallbacks.dispatchOnDone(getCallerIdentity(), utteranceId); + mCallbacks.dispatchOnStop(getCallerIdentity(), utteranceId); + } + } + + @Override + public void dispatchOnFallback() { + final String utteranceId = getUtteranceId(); + if (utteranceId != null) { + mCallbacks.dispatchOnFallback(getCallerIdentity(), utteranceId); } } @@ -511,44 +760,260 @@ public abstract class TextToSpeechService extends Service { } @Override - public void dispatchOnError() { + public void dispatchOnError(int errorCode) { final String utteranceId = getUtteranceId(); if (utteranceId != null) { - mCallbacks.dispatchOnError(getCallerIdentity(), utteranceId); + mCallbacks.dispatchOnError(getCallerIdentity(), utteranceId, errorCode); } } - public int getStreamType() { - return getIntParam(Engine.KEY_PARAM_STREAM, Engine.DEFAULT_STREAM); + abstract public String getUtteranceId(); + + String getStringParam(Bundle params, String key, String defaultValue) { + return params == null ? defaultValue : params.getString(key, defaultValue); + } + + int getIntParam(Bundle params, String key, int defaultValue) { + return params == null ? defaultValue : params.getInt(key, defaultValue); + } + + float getFloatParam(Bundle params, String key, float defaultValue) { + return params == null ? defaultValue : params.getFloat(key, defaultValue); + } + } + + /** + * UtteranceSpeechItem for V1 API speech items. V1 API speech items keep + * synthesis parameters in a single Bundle passed as parameter. This class + * allow subclasses to access them conveniently. + */ + private abstract class SpeechItemV1 extends UtteranceSpeechItem { + protected final Bundle mParams; + + SpeechItemV1(Object callerIdentity, int callerUid, int callerPid, + Bundle params) { + super(callerIdentity, callerUid, callerPid); + mParams = params; + } + + boolean hasLanguage() { + return !TextUtils.isEmpty(getStringParam(mParams, Engine.KEY_PARAM_LANGUAGE, null)); } - public float getVolume() { - return getFloatParam(Engine.KEY_PARAM_VOLUME, Engine.DEFAULT_VOLUME); + int getSpeechRate() { + return getIntParam(mParams, Engine.KEY_PARAM_RATE, getDefaultSpeechRate()); } - public float getPan() { - return getFloatParam(Engine.KEY_PARAM_PAN, Engine.DEFAULT_PAN); + int getPitch() { + return getIntParam(mParams, Engine.KEY_PARAM_PITCH, Engine.DEFAULT_PITCH); } + @Override public String getUtteranceId() { - return getStringParam(Engine.KEY_PARAM_UTTERANCE_ID, null); + return getStringParam(mParams, Engine.KEY_PARAM_UTTERANCE_ID, null); + } + + int getStreamType() { + return getIntParam(mParams, Engine.KEY_PARAM_STREAM, Engine.DEFAULT_STREAM); + } + + float getVolume() { + return getFloatParam(mParams, Engine.KEY_PARAM_VOLUME, Engine.DEFAULT_VOLUME); + } + + float getPan() { + return getFloatParam(mParams, Engine.KEY_PARAM_PAN, Engine.DEFAULT_PAN); + } + } + + class SynthesisSpeechItemV2 extends UtteranceSpeechItem { + private final SynthesisRequestV2 mSynthesisRequest; + private AbstractSynthesisCallback mSynthesisCallback; + private final EventLoggerV2 mEventLogger; + + public SynthesisSpeechItemV2(Object callerIdentity, int callerUid, int callerPid, + SynthesisRequestV2 synthesisRequest) { + super(callerIdentity, callerUid, callerPid); + + mSynthesisRequest = synthesisRequest; + mEventLogger = new EventLoggerV2(synthesisRequest, callerUid, callerPid, + mPackageName); + + updateSpeechSpeedParam(synthesisRequest); + } + + private void updateSpeechSpeedParam(SynthesisRequestV2 synthesisRequest) { + Bundle voiceParams = mSynthesisRequest.getVoiceParams(); + + // Inject default speech speed if needed + if (voiceParams.containsKey(TextToSpeechClient.Params.SPEECH_SPEED)) { + if (voiceParams.getFloat(TextToSpeechClient.Params.SPEECH_SPEED) <= 0) { + voiceParams.putFloat(TextToSpeechClient.Params.SPEECH_SPEED, + getDefaultSpeechRate() / 100.0f); + } + } } - protected String getStringParam(String key, String defaultValue) { - return mParams == null ? defaultValue : mParams.getString(key, defaultValue); + @Override + public boolean isValid() { + if (mSynthesisRequest.getText() == null) { + Log.e(TAG, "null synthesis text"); + return false; + } + if (mSynthesisRequest.getText().length() >= TextToSpeech.getMaxSpeechInputLength()) { + Log.w(TAG, "Text too long: " + mSynthesisRequest.getText().length() + " chars"); + return false; + } + + return true; } - protected int getIntParam(String key, int defaultValue) { - return mParams == null ? defaultValue : mParams.getInt(key, defaultValue); + @Override + protected void playImpl() { + AbstractSynthesisCallback synthesisCallback; + if (mEventLogger != null) { + mEventLogger.onRequestProcessingStart(); + } + synchronized (this) { + // stop() might have been called before we enter this + // synchronized block. + if (isStopped()) { + return; + } + mSynthesisCallback = createSynthesisCallback(); + synthesisCallback = mSynthesisCallback; + } + + // Get voice info + VoiceInfo voiceInfo = getVoicesInfoWithId(mSynthesisRequest.getVoiceId()); + if (voiceInfo != null) { + // Primary voice + TextToSpeechService.this.onSynthesizeTextV2(mSynthesisRequest, voiceInfo, + synthesisCallback); + } else { + Log.e(TAG, "Unknown voice id:" + mSynthesisRequest.getVoiceId()); + synthesisCallback.error(TextToSpeechClient.Status.ERROR_INVALID_REQUEST); + } + + // Fix for case where client called .start() & .error(), but did not called .done() + if (!synthesisCallback.hasFinished()) { + synthesisCallback.done(); + } } - protected float getFloatParam(String key, float defaultValue) { - return mParams == null ? defaultValue : mParams.getFloat(key, defaultValue); + @Override + protected void stopImpl() { + AbstractSynthesisCallback synthesisCallback; + synchronized (this) { + synthesisCallback = mSynthesisCallback; + } + if (synthesisCallback != null) { + // If the synthesis callback is null, it implies that we haven't + // entered the synchronized(this) block in playImpl which in + // turn implies that synthesis would not have started. + synthesisCallback.stop(); + TextToSpeechService.this.onStop(); + } } + protected AbstractSynthesisCallback createSynthesisCallback() { + return new PlaybackSynthesisCallback(getStreamType(), getVolume(), getPan(), + mAudioPlaybackHandler, this, getCallerIdentity(), mEventLogger, + implementsV2API()); + } + + private int getStreamType() { + return getIntParam(mSynthesisRequest.getAudioParams(), + TextToSpeechClient.Params.AUDIO_PARAM_STREAM, + Engine.DEFAULT_STREAM); + } + + private float getVolume() { + return getFloatParam(mSynthesisRequest.getAudioParams(), + TextToSpeechClient.Params.AUDIO_PARAM_VOLUME, + Engine.DEFAULT_VOLUME); + } + + private float getPan() { + return getFloatParam(mSynthesisRequest.getAudioParams(), + TextToSpeechClient.Params.AUDIO_PARAM_PAN, + Engine.DEFAULT_PAN); + } + + @Override + public String getUtteranceId() { + return mSynthesisRequest.getUtteranceId(); + } + } + + private class SynthesisToFileOutputStreamSpeechItemV2 extends SynthesisSpeechItemV2 { + private final FileOutputStream mFileOutputStream; + + public SynthesisToFileOutputStreamSpeechItemV2(Object callerIdentity, int callerUid, + int callerPid, + SynthesisRequestV2 synthesisRequest, + FileOutputStream fileOutputStream) { + super(callerIdentity, callerUid, callerPid, synthesisRequest); + mFileOutputStream = fileOutputStream; + } + + @Override + protected AbstractSynthesisCallback createSynthesisCallback() { + return new FileSynthesisCallback(mFileOutputStream.getChannel(), + this, getCallerIdentity(), implementsV2API()); + } + + @Override + protected void playImpl() { + super.playImpl(); + try { + mFileOutputStream.close(); + } catch(IOException e) { + Log.w(TAG, "Failed to close output file", e); + } + } + } + + private class AudioSpeechItemV2 extends UtteranceSpeechItem { + private final AudioPlaybackQueueItem mItem; + private final Bundle mAudioParams; + private final String mUtteranceId; + + public AudioSpeechItemV2(Object callerIdentity, int callerUid, int callerPid, + String utteranceId, Bundle audioParams, Uri uri) { + super(callerIdentity, callerUid, callerPid); + mUtteranceId = utteranceId; + mAudioParams = audioParams; + mItem = new AudioPlaybackQueueItem(this, getCallerIdentity(), + TextToSpeechService.this, uri, getStreamType()); + } + + @Override + public boolean isValid() { + return true; + } + + @Override + protected void playImpl() { + mAudioPlaybackHandler.enqueue(mItem); + } + + @Override + protected void stopImpl() { + // Do nothing. + } + + protected int getStreamType() { + return mAudioParams.getInt(TextToSpeechClient.Params.AUDIO_PARAM_STREAM); + } + + public String getUtteranceId() { + return mUtteranceId; + } } - class SynthesisSpeechItem extends UtteranceSpeechItem { + + class SynthesisSpeechItemV1 extends SpeechItemV1 { // Never null. private final String mText; private final SynthesisRequest mSynthesisRequest; @@ -556,10 +1021,10 @@ public abstract class TextToSpeechService extends Service { // Non null after synthesis has started, and all accesses // guarded by 'this'. private AbstractSynthesisCallback mSynthesisCallback; - private final EventLogger mEventLogger; + private final EventLoggerV1 mEventLogger; private final int mCallerUid; - public SynthesisSpeechItem(Object callerIdentity, int callerUid, int callerPid, + public SynthesisSpeechItemV1(Object callerIdentity, int callerUid, int callerPid, Bundle params, String text) { super(callerIdentity, callerUid, callerPid, params); mText = text; @@ -567,7 +1032,7 @@ public abstract class TextToSpeechService extends Service { mSynthesisRequest = new SynthesisRequest(mText, mParams); mDefaultLocale = getSettingsLocale(); setRequestParams(mSynthesisRequest); - mEventLogger = new EventLogger(mSynthesisRequest, callerUid, callerPid, + mEventLogger = new EventLoggerV1(mSynthesisRequest, callerUid, callerPid, mPackageName); } @@ -589,25 +1054,30 @@ public abstract class TextToSpeechService extends Service { } @Override - protected int playImpl() { + protected void playImpl() { AbstractSynthesisCallback synthesisCallback; mEventLogger.onRequestProcessingStart(); synchronized (this) { // stop() might have been called before we enter this // synchronized block. if (isStopped()) { - return TextToSpeech.ERROR; + return; } mSynthesisCallback = createSynthesisCallback(); synthesisCallback = mSynthesisCallback; } + TextToSpeechService.this.onSynthesizeText(mSynthesisRequest, synthesisCallback); - return synthesisCallback.isDone() ? TextToSpeech.SUCCESS : TextToSpeech.ERROR; + + // Fix for case where client called .start() & .error(), but did not called .done() + if (synthesisCallback.hasStarted() && !synthesisCallback.hasFinished()) { + synthesisCallback.done(); + } } protected AbstractSynthesisCallback createSynthesisCallback() { return new PlaybackSynthesisCallback(getStreamType(), getVolume(), getPan(), - mAudioPlaybackHandler, this, getCallerIdentity(), mEventLogger); + mAudioPlaybackHandler, this, getCallerIdentity(), mEventLogger, false); } private void setRequestParams(SynthesisRequest request) { @@ -632,37 +1102,25 @@ public abstract class TextToSpeechService extends Service { } } - public String getLanguage() { - return getStringParam(Engine.KEY_PARAM_LANGUAGE, mDefaultLocale[0]); - } - - private boolean hasLanguage() { - return !TextUtils.isEmpty(getStringParam(Engine.KEY_PARAM_LANGUAGE, null)); - } - private String getCountry() { if (!hasLanguage()) return mDefaultLocale[1]; - return getStringParam(Engine.KEY_PARAM_COUNTRY, ""); + return getStringParam(mParams, Engine.KEY_PARAM_COUNTRY, ""); } private String getVariant() { if (!hasLanguage()) return mDefaultLocale[2]; - return getStringParam(Engine.KEY_PARAM_VARIANT, ""); + return getStringParam(mParams, Engine.KEY_PARAM_VARIANT, ""); } - private int getSpeechRate() { - return getIntParam(Engine.KEY_PARAM_RATE, getDefaultSpeechRate()); - } - - private int getPitch() { - return getIntParam(Engine.KEY_PARAM_PITCH, Engine.DEFAULT_PITCH); + public String getLanguage() { + return getStringParam(mParams, Engine.KEY_PARAM_LANGUAGE, mDefaultLocale[0]); } } - private class SynthesisToFileOutputStreamSpeechItem extends SynthesisSpeechItem { + private class SynthesisToFileOutputStreamSpeechItemV1 extends SynthesisSpeechItemV1 { private final FileOutputStream mFileOutputStream; - public SynthesisToFileOutputStreamSpeechItem(Object callerIdentity, int callerUid, + public SynthesisToFileOutputStreamSpeechItemV1(Object callerIdentity, int callerUid, int callerPid, Bundle params, String text, FileOutputStream fileOutputStream) { super(callerIdentity, callerUid, callerPid, params, text); mFileOutputStream = fileOutputStream; @@ -670,30 +1128,26 @@ public abstract class TextToSpeechService extends Service { @Override protected AbstractSynthesisCallback createSynthesisCallback() { - return new FileSynthesisCallback(mFileOutputStream.getChannel()); + return new FileSynthesisCallback(mFileOutputStream.getChannel(), + this, getCallerIdentity(), false); } @Override - protected int playImpl() { + protected void playImpl() { dispatchOnStart(); - int status = super.playImpl(); - if (status == TextToSpeech.SUCCESS) { - dispatchOnDone(); - } else { - dispatchOnError(); - } + super.playImpl(); try { mFileOutputStream.close(); } catch(IOException e) { Log.w(TAG, "Failed to close output file", e); } - return status; } } - private class AudioSpeechItem extends UtteranceSpeechItem { + private class AudioSpeechItemV1 extends SpeechItemV1 { private final AudioPlaybackQueueItem mItem; - public AudioSpeechItem(Object callerIdentity, int callerUid, int callerPid, + + public AudioSpeechItemV1(Object callerIdentity, int callerUid, int callerPid, Bundle params, Uri uri) { super(callerIdentity, callerUid, callerPid, params); mItem = new AudioPlaybackQueueItem(this, getCallerIdentity(), @@ -706,23 +1160,29 @@ public abstract class TextToSpeechService extends Service { } @Override - protected int playImpl() { + protected void playImpl() { mAudioPlaybackHandler.enqueue(mItem); - return TextToSpeech.SUCCESS; } @Override protected void stopImpl() { // Do nothing. } + + @Override + public String getUtteranceId() { + return getStringParam(mParams, Engine.KEY_PARAM_UTTERANCE_ID, null); + } } private class SilenceSpeechItem extends UtteranceSpeechItem { private final long mDuration; + private final String mUtteranceId; public SilenceSpeechItem(Object callerIdentity, int callerUid, int callerPid, - Bundle params, long duration) { - super(callerIdentity, callerUid, callerPid, params); + String utteranceId, long duration) { + super(callerIdentity, callerUid, callerPid); + mUtteranceId = utteranceId; mDuration = duration; } @@ -732,26 +1192,57 @@ public abstract class TextToSpeechService extends Service { } @Override - protected int playImpl() { + protected void playImpl() { mAudioPlaybackHandler.enqueue(new SilencePlaybackQueueItem( this, getCallerIdentity(), mDuration)); - return TextToSpeech.SUCCESS; } @Override protected void stopImpl() { - // Do nothing, handled by AudioPlaybackHandler#stopForApp + + } + + @Override + public String getUtteranceId() { + return mUtteranceId; + } + } + + /** + * Call {@link TextToSpeechService#onVoicesInfoChange} on synthesis thread. + */ + private class VoicesInfoChangeItem extends SpeechItem { + public VoicesInfoChangeItem() { + super(null, 0, 0); // It's never initiated by an user + } + + @Override + public boolean isValid() { + return true; + } + + @Override + protected void playImpl() { + TextToSpeechService.this.onVoicesInfoChange(); + } + + @Override + protected void stopImpl() { + // No-op } } + /** + * Call {@link TextToSpeechService#onLoadLanguage} on synth thread. + */ private class LoadLanguageItem extends SpeechItem { private final String mLanguage; private final String mCountry; private final String mVariant; public LoadLanguageItem(Object callerIdentity, int callerUid, int callerPid, - Bundle params, String language, String country, String variant) { - super(callerIdentity, callerUid, callerPid, params); + String language, String country, String variant) { + super(callerIdentity, callerUid, callerPid); mLanguage = language; mCountry = country; mVariant = variant; @@ -763,14 +1254,8 @@ public abstract class TextToSpeechService extends Service { } @Override - protected int playImpl() { - int result = TextToSpeechService.this.onLoadLanguage(mLanguage, mCountry, mVariant); - if (result == TextToSpeech.LANG_AVAILABLE || - result == TextToSpeech.LANG_COUNTRY_AVAILABLE || - result == TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE) { - return TextToSpeech.SUCCESS; - } - return TextToSpeech.ERROR; + protected void playImpl() { + TextToSpeechService.this.onLoadLanguage(mLanguage, mCountry, mVariant); } @Override @@ -800,7 +1285,7 @@ public abstract class TextToSpeechService extends Service { return TextToSpeech.ERROR; } - SpeechItem item = new SynthesisSpeechItem(caller, + SpeechItem item = new SynthesisSpeechItemV1(caller, Binder.getCallingUid(), Binder.getCallingPid(), params, text); return mSynthHandler.enqueueSpeechItem(queueMode, item); } @@ -818,7 +1303,7 @@ public abstract class TextToSpeechService extends Service { final ParcelFileDescriptor sameFileDescriptor = ParcelFileDescriptor.adoptFd( fileDescriptor.detachFd()); - SpeechItem item = new SynthesisToFileOutputStreamSpeechItem(caller, + SpeechItem item = new SynthesisToFileOutputStreamSpeechItemV1(caller, Binder.getCallingUid(), Binder.getCallingPid(), params, text, new ParcelFileDescriptor.AutoCloseOutputStream(sameFileDescriptor)); return mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_ADD, item); @@ -830,19 +1315,19 @@ public abstract class TextToSpeechService extends Service { return TextToSpeech.ERROR; } - SpeechItem item = new AudioSpeechItem(caller, + SpeechItem item = new AudioSpeechItemV1(caller, Binder.getCallingUid(), Binder.getCallingPid(), params, audioUri); return mSynthHandler.enqueueSpeechItem(queueMode, item); } @Override - public int playSilence(IBinder caller, long duration, int queueMode, Bundle params) { - if (!checkNonNull(caller, params)) { + public int playSilence(IBinder caller, long duration, int queueMode, String utteranceId) { + if (!checkNonNull(caller)) { return TextToSpeech.ERROR; } SpeechItem item = new SilenceSpeechItem(caller, - Binder.getCallingUid(), Binder.getCallingPid(), params, duration); + Binder.getCallingUid(), Binder.getCallingPid(), utteranceId, duration); return mSynthHandler.enqueueSpeechItem(queueMode, item); } @@ -912,7 +1397,7 @@ public abstract class TextToSpeechService extends Service { retVal == TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE) { SpeechItem item = new LoadLanguageItem(caller, Binder.getCallingUid(), - Binder.getCallingPid(), null, lang, country, variant); + Binder.getCallingPid(), lang, country, variant); if (mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_ADD, item) != TextToSpeech.SUCCESS) { @@ -943,6 +1428,58 @@ public abstract class TextToSpeechService extends Service { } return true; } + + @Override + public List<VoiceInfo> getVoicesInfo() { + return TextToSpeechService.this.getVoicesInfo(); + } + + @Override + public int speakV2(IBinder callingInstance, + SynthesisRequestV2 request) { + if (!checkNonNull(callingInstance, request)) { + return TextToSpeech.ERROR; + } + + SpeechItem item = new SynthesisSpeechItemV2(callingInstance, + Binder.getCallingUid(), Binder.getCallingPid(), request); + return mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_ADD, item); + } + + @Override + public int synthesizeToFileDescriptorV2(IBinder callingInstance, + ParcelFileDescriptor fileDescriptor, + SynthesisRequestV2 request) { + if (!checkNonNull(callingInstance, request, fileDescriptor)) { + return TextToSpeech.ERROR; + } + + // In test env, ParcelFileDescriptor instance may be EXACTLY the same + // one that is used by client. And it will be closed by a client, thus + // preventing us from writing anything to it. + final ParcelFileDescriptor sameFileDescriptor = ParcelFileDescriptor.adoptFd( + fileDescriptor.detachFd()); + + SpeechItem item = new SynthesisToFileOutputStreamSpeechItemV2(callingInstance, + Binder.getCallingUid(), Binder.getCallingPid(), request, + new ParcelFileDescriptor.AutoCloseOutputStream(sameFileDescriptor)); + return mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_ADD, item); + + } + + @Override + public int playAudioV2( + IBinder callingInstance, Uri audioUri, String utteranceId, + Bundle systemParameters) { + if (!checkNonNull(callingInstance, audioUri, systemParameters)) { + return TextToSpeech.ERROR; + } + + SpeechItem item = new AudioSpeechItemV2(callingInstance, + Binder.getCallingUid(), Binder.getCallingPid(), utteranceId, systemParameters, + audioUri); + return mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_ADD, item); + } }; private class CallbackMap extends RemoteCallbackList<ITextToSpeechCallback> { @@ -964,11 +1501,31 @@ public abstract class TextToSpeechService extends Service { } } - public void dispatchOnDone(Object callerIdentity, String utteranceId) { + public void dispatchOnFallback(Object callerIdentity, String utteranceId) { + ITextToSpeechCallback cb = getCallbackFor(callerIdentity); + if (cb == null) return; + try { + cb.onFallback(utteranceId); + } catch (RemoteException e) { + Log.e(TAG, "Callback onFallback failed: " + e); + } + } + + public void dispatchOnStop(Object callerIdentity, String utteranceId) { + ITextToSpeechCallback cb = getCallbackFor(callerIdentity); + if (cb == null) return; + try { + cb.onStop(utteranceId); + } catch (RemoteException e) { + Log.e(TAG, "Callback onStop failed: " + e); + } + } + + public void dispatchOnSuccess(Object callerIdentity, String utteranceId) { ITextToSpeechCallback cb = getCallbackFor(callerIdentity); if (cb == null) return; try { - cb.onDone(utteranceId); + cb.onSuccess(utteranceId); } catch (RemoteException e) { Log.e(TAG, "Callback onDone failed: " + e); } @@ -985,11 +1542,12 @@ public abstract class TextToSpeechService extends Service { } - public void dispatchOnError(Object callerIdentity, String utteranceId) { + public void dispatchOnError(Object callerIdentity, String utteranceId, + int errorCode) { ITextToSpeechCallback cb = getCallbackFor(callerIdentity); if (cb == null) return; try { - cb.onError(utteranceId); + cb.onError(utteranceId, errorCode); } catch (RemoteException e) { Log.e(TAG, "Callback onError failed: " + e); } @@ -1001,7 +1559,7 @@ public abstract class TextToSpeechService extends Service { synchronized (mCallerToCallback) { mCallerToCallback.remove(caller); } - mSynthHandler.stopForApp(caller); + //mSynthHandler.stopForApp(caller); } @Override @@ -1012,6 +1570,18 @@ public abstract class TextToSpeechService extends Service { } } + public void dispatchVoicesInfoChange(List<VoiceInfo> voicesInfo) { + synchronized (mCallerToCallback) { + for (ITextToSpeechCallback callback : mCallerToCallback.values()) { + try { + callback.onVoicesInfoChange(voicesInfo); + } catch (RemoteException e) { + Log.e(TAG, "Failed to request reconnect", e); + } + } + } + } + private ITextToSpeechCallback getCallbackFor(Object caller) { ITextToSpeechCallback cb; IBinder asBinder = (IBinder) caller; @@ -1021,7 +1591,5 @@ public abstract class TextToSpeechService extends Service { return cb; } - } - } diff --git a/core/java/android/speech/tts/VoiceInfo.aidl b/core/java/android/speech/tts/VoiceInfo.aidl new file mode 100644 index 0000000..4005f8b --- /dev/null +++ b/core/java/android/speech/tts/VoiceInfo.aidl @@ -0,0 +1,20 @@ +/* +** +** Copyright 2013, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +package android.speech.tts; + +parcelable VoiceInfo;
\ No newline at end of file diff --git a/core/java/android/speech/tts/VoiceInfo.java b/core/java/android/speech/tts/VoiceInfo.java new file mode 100644 index 0000000..61e4dde --- /dev/null +++ b/core/java/android/speech/tts/VoiceInfo.java @@ -0,0 +1,318 @@ +package android.speech.tts; + +import android.os.Bundle; +import android.os.Parcel; +import android.os.Parcelable; + +import java.util.Locale; + +/** + * Characteristics and features of a Text-To-Speech Voice. Each TTS Engine can expose + * multiple voices for multiple locales, with different set of features. + * + * Each VoiceInfo has an unique ID. This ID can be obtained using the {@link #getId()} method and + * will persist until the client is asked to re-evaluate the list of available voices in the + * {@link TextToSpeechClient.ConnectionCallbacks#onEngineStatusChange(android.speech.tts.TextToSpeechClient.EngineStatus)} + * callback. The id can be used to reference a VoiceInfo in an instance of {@link RequestConfig}; + * the {@link TextToSpeechClient.Params#FALLBACK_VOICE_ID} voice parameter is an example of this. + */ +public final class VoiceInfo implements Parcelable { + /** Very low, but still intelligible quality of speech synthesis */ + public static final int QUALITY_VERY_LOW = 100; + + /** Low, not human-like quality of speech synthesis */ + public static final int QUALITY_LOW = 200; + + /** Normal quality of speech synthesis */ + public static final int QUALITY_NORMAL = 300; + + /** High, human-like quality of speech synthesis */ + public static final int QUALITY_HIGH = 400; + + /** Very high, almost human-indistinguishable quality of speech synthesis */ + public static final int QUALITY_VERY_HIGH = 500; + + /** Very low expected synthesizer latency (< 20ms) */ + public static final int LATENCY_VERY_LOW = 100; + + /** Low expected synthesizer latency (~20ms) */ + public static final int LATENCY_LOW = 200; + + /** Normal expected synthesizer latency (~50ms) */ + public static final int LATENCY_NORMAL = 300; + + /** Network based expected synthesizer latency (~200ms) */ + public static final int LATENCY_HIGH = 400; + + /** Very slow network based expected synthesizer latency (> 200ms) */ + public static final int LATENCY_VERY_HIGH = 500; + + /** Additional feature key, with string value, gender of the speaker */ + public static final String FEATURE_SPEAKER_GENDER = "speakerGender"; + + /** Additional feature key, with integer value, speaking speed in words per minute + * when {@link TextToSpeechClient.Params#SPEECH_SPEED} parameter is set to {@code 1.0} */ + public static final String FEATURE_WORDS_PER_MINUTE = "wordsPerMinute"; + + /** + * Additional feature key, with boolean value, that indicates that voice may need to + * download additional data if used for synthesis. + * + * Making a request with a voice that has this feature may result in a + * {@link TextToSpeechClient.Status#ERROR_DOWNLOADING_ADDITIONAL_DATA} error. It's recommended + * to set the {@link TextToSpeechClient.Params#FALLBACK_VOICE_ID} voice parameter to reference + * a fully installed voice (or network voice) that can serve as replacement. + * + * Note: It's a good practice for a TTS engine to provide a sensible fallback voice as the + * default value for {@link TextToSpeechClient.Params#FALLBACK_VOICE_ID} parameter if this + * feature is present. + */ + public static final String FEATURE_MAY_AUTOINSTALL = "mayAutoInstall"; + + private final int id; + private final Locale mLocale; + private final int mQuality; + private final int mLatency; + private final boolean mRequiresNetworkConnection; + private final Bundle mParams; + private final Bundle mAdditionalFeatures; + + private VoiceInfo(Parcel in) { + this.id = in.readInt(); + String[] localesData = new String[3]; + in.readStringArray(localesData); + this.mLocale = new Locale(localesData[0], localesData[1], localesData[2]); + + this.mQuality = in.readInt(); + this.mLatency = in.readInt(); + this.mRequiresNetworkConnection = (in.readByte() == 1); + + this.mParams = in.readBundle(); + this.mAdditionalFeatures = in.readBundle(); + } + + private VoiceInfo(int id, + Locale locale, + int quality, + int latency, + boolean requiresNetworkConnection, + Bundle params, + Bundle additionalFeatures) { + this.id = id; + this.mLocale = locale; + this.mQuality = quality; + this.mLatency = latency; + this.mRequiresNetworkConnection = requiresNetworkConnection; + this.mParams = params; + this.mAdditionalFeatures = additionalFeatures; + } + + /** Builder, allows TTS engines to create VoiceInfo instances. */ + public static final class Builder { + private int id; + private Locale locale; + private int quality = VoiceInfo.QUALITY_NORMAL; + private int latency = VoiceInfo.LATENCY_NORMAL; + private boolean requiresNetworkConnection; + private Bundle params; + private Bundle additionalFeatures; + + public Builder() { + + } + + /** + * Copy fields from given VoiceInfo instance. + */ + public Builder(VoiceInfo voiceInfo) { + this.id = voiceInfo.id; + this.locale = voiceInfo.mLocale; + this.quality = voiceInfo.mQuality; + this.latency = voiceInfo.mLatency; + this.requiresNetworkConnection = voiceInfo.mRequiresNetworkConnection; + this.params = (Bundle)voiceInfo.mParams.clone(); + this.additionalFeatures = (Bundle) voiceInfo.mAdditionalFeatures.clone(); + } + + /** + * Sets the voice's unique ID. It will be used by clients to name the voice used by a + * request. + */ + public Builder setId(int id) { + this.id = id; + return this; + } + + /** + * Sets voice locale. This has to be a valid locale, built from ISO 639-1 and ISO 3166-1 + * two letter codes. + */ + public Builder setLocale(Locale locale) { + this.locale = locale; + return this; + } + + /** + * Sets map of all available request parameters with their default values. + * Some common parameter names can be found in {@link TextToSpeechClient.Params} static + * members. + */ + public Builder setParamsWithDefaults(Bundle params) { + this.params = params; + return this; + } + + /** + * Sets map of additional voice features. Some common feature names can be found in + * {@link VoiceInfo} static members. + */ + public Builder setAdditionalFeatures(Bundle additionalFeatures) { + this.additionalFeatures = additionalFeatures; + return this; + } + + /** + * Sets the voice quality (higher is better). + */ + public Builder setQuality(int quality) { + this.quality = quality; + return this; + } + + /** + * Sets the voice latency (lower is better). + */ + public Builder setLatency(int latency) { + this.latency = latency; + return this; + } + + /** + * Sets whether the voice requires network connection to work properly. + */ + public Builder setRequiresNetworkConnection(boolean requiresNetworkConnection) { + this.requiresNetworkConnection = requiresNetworkConnection; + return this; + } + + /** + * @return The built VoiceInfo instance + */ + public VoiceInfo build() { + if (locale == null) { + throw new IllegalStateException("Locale can't be null"); + } + + return new VoiceInfo(id, locale, quality, latency, + requiresNetworkConnection, + ((params == null) ? new Bundle() : + (Bundle)params.clone()), + ((additionalFeatures == null) ? new Bundle() : + (Bundle)additionalFeatures.clone())); + } + } + + /** + * @hide + */ + @Override + public int describeContents() { + return 0; + } + + /** + * @hide + */ + @Override + public void writeToParcel(Parcel dest, int flags) { + dest.writeInt(id); + String[] localesData = new String[]{mLocale.getLanguage(), mLocale.getCountry(), mLocale.getVariant()}; + dest.writeStringArray(localesData); + dest.writeInt(mQuality); + dest.writeInt(mLatency); + dest.writeByte((byte) (mRequiresNetworkConnection ? 1 : 0)); + dest.writeBundle(mParams); + dest.writeBundle(mAdditionalFeatures); + } + + /** + * @hide + */ + public static final Parcelable.Creator<VoiceInfo> CREATOR = new Parcelable.Creator<VoiceInfo>() { + @Override + public VoiceInfo createFromParcel(Parcel in) { + return new VoiceInfo(in); + } + + @Override + public VoiceInfo[] newArray(int size) { + return new VoiceInfo[size]; + } + }; + + /** + * @return The voice's locale + */ + public Locale getLocale() { + return mLocale; + } + + /** + * @return The voice's quality (higher is better) + */ + public int getQuality() { + return mQuality; + } + + /** + * @return The voice's latency (lower is better) + */ + public int getLatency() { + return mLatency; + } + + /** + * @return Does the Voice require a network connection to work. + */ + public boolean getRequiresNetworkConnection() { + return mRequiresNetworkConnection; + } + + /** + * @return Bundle of all available parameters with their default values. + */ + public Bundle getParamsWithDefaults() { + return mParams; + } + + /** + * @return Unique voice identifier. + * + * Each VoiceInfo has an unique ID, that persists until client is asked to re-evaluate the + * set of the available languages in the {@link TextToSpeechClient.ConnectionCallbacks#onEngineStatusChange(android.speech.tts.TextToSpeechClient.EngineStatus)} + * callback. + */ + public int getId() { + return id; + } + + /** + * @return Additional features of the voice. + */ + public Bundle getAdditionalFeatures() { + return mAdditionalFeatures; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(64); + return builder.append("VoiceInfo[Id: ").append(id) + .append(" ,locale: ").append(mLocale) + .append(" ,quality: ").append(mQuality) + .append(" ,latency: ").append(mLatency) + .append(" ,requiresNetwork: ").append(mRequiresNetworkConnection) + .append(" ,paramsWithDefaults: ").append(mParams.toString()) + .append(" ,additionalFeatures: ").append(mAdditionalFeatures.toString()) + .append("]").toString(); + } +} |
