summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xcore/java/android/speech/tts/TextToSpeech.java111
-rw-r--r--packages/TtsService/jni/android_tts_SynthProxy.cpp54
-rwxr-xr-xpackages/TtsService/src/android/tts/SynthProxy.java11
-rwxr-xr-xpackages/TtsService/src/android/tts/TtsService.java17
4 files changed, 144 insertions, 49 deletions
diff --git a/core/java/android/speech/tts/TextToSpeech.java b/core/java/android/speech/tts/TextToSpeech.java
index 841257f..f010076 100755
--- a/core/java/android/speech/tts/TextToSpeech.java
+++ b/core/java/android/speech/tts/TextToSpeech.java
@@ -151,6 +151,23 @@ public class TextToSpeech {
/**
* {@hide}
*/
+ public static final float DEFAULT_VOLUME = 1.0f;
+ /**
+ * {@hide}
+ */
+ protected static final String DEFAULT_VOLUME_STRING = "1.0";
+ /**
+ * {@hide}
+ */
+ public static final float DEFAULT_PAN = 0.0f;
+ /**
+ * {@hide}
+ */
+ protected static final String DEFAULT_PAN_STRING = "0.0";
+
+ /**
+ * {@hide}
+ */
public static final int USE_DEFAULTS = 0; // false
/**
* {@hide}
@@ -331,6 +348,24 @@ public class TextToSpeech {
* @see TextToSpeech#synthesizeToFile(String, HashMap, String)
*/
public static final String KEY_PARAM_UTTERANCE_ID = "utteranceId";
+ /**
+ * {@hide}
+ * Parameter key to specify the speech volume relative to the current stream type
+ * volume used when speaking text. Volume is specified as a float ranging from 0 to 1
+ * where 0 is silence, and 1 is the maximum volume.
+ * @see TextToSpeech#speak(String, int, HashMap)
+ * @see TextToSpeech#playEarcon(String, int, HashMap)
+ */
+ public static final String KEY_PARAM_VOLUME = "volume";
+ /**
+ * {@hide}
+ * Parameter key to specify how the speech is panned from left to right when speaking text.
+ * Pan is specified as a float ranging from -1 to +1 where -1 maps to a hard-left pan,
+ * 0 to center, and +1 to hard-right.
+ * @see TextToSpeech#speak(String, int, HashMap)
+ * @see TextToSpeech#playEarcon(String, int, HashMap)
+ */
+ public static final String KEY_PARAM_PAN = "pan";
// key positions in the array of cached parameters
/**
@@ -371,7 +406,18 @@ public class TextToSpeech {
/**
* {@hide}
*/
- protected static final int NB_CACHED_PARAMS = 8;
+ protected static final int PARAM_POSITION_VOLUME = 16;
+
+ /**
+ * {@hide}
+ */
+ protected static final int PARAM_POSITION_PAN = 18;
+
+
+ /**
+ * {@hide}
+ */
+ protected static final int NB_CACHED_PARAMS = 20;
}
/**
@@ -416,6 +462,8 @@ public class TextToSpeech {
mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID] = Engine.KEY_PARAM_UTTERANCE_ID;
mCachedParams[Engine.PARAM_POSITION_ENGINE] = Engine.KEY_PARAM_ENGINE;
mCachedParams[Engine.PARAM_POSITION_PITCH] = Engine.KEY_PARAM_PITCH;
+ mCachedParams[Engine.PARAM_POSITION_VOLUME] = Engine.KEY_PARAM_VOLUME;
+ mCachedParams[Engine.PARAM_POSITION_PAN] = Engine.KEY_PARAM_PAN;
// Leave all defaults that are shown in Settings uninitialized/at the default
// so that the values set in Settings will take effect if the application does
@@ -429,6 +477,8 @@ public class TextToSpeech {
mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = "";
mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = "";
mCachedParams[Engine.PARAM_POSITION_PITCH + 1] = "100";
+ mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING;
+ mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING;
initTts();
}
@@ -717,24 +767,18 @@ public class TextToSpeech {
{
synchronized (mStartLock) {
int result = ERROR;
- Log.i("TTS received: ", text);
+ Log.i("TTS", "speak() queueMode=" + queueMode);
if (!mStarted) {
return result;
}
try {
if ((params != null) && (!params.isEmpty())) {
- String extra = params.get(Engine.KEY_PARAM_STREAM);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra;
- }
- extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
- }
- extra = params.get(Engine.KEY_PARAM_ENGINE);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra;
- }
+ setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM);
+ setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+ Engine.PARAM_POSITION_UTTERANCE_ID);
+ setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE);
+ setCachedParam(params, Engine.KEY_PARAM_VOLUME, Engine.PARAM_POSITION_VOLUME);
+ setCachedParam(params, Engine.KEY_PARAM_PAN, Engine.PARAM_POSITION_PAN);
}
result = mITts.speak(mPackageName, text, queueMode, mCachedParams);
} catch (RemoteException e) {
@@ -791,10 +835,9 @@ public class TextToSpeech {
if (extra != null) {
mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra;
}
- extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
- }
+ setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM);
+ setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+ Engine.PARAM_POSITION_UTTERANCE_ID);
}
result = mITts.playEarcon(mPackageName, earcon, queueMode, null);
} catch (RemoteException e) {
@@ -845,10 +888,8 @@ public class TextToSpeech {
}
try {
if ((params != null) && (!params.isEmpty())) {
- String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
- }
+ setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+ Engine.PARAM_POSITION_UTTERANCE_ID);
}
result = mITts.playSilence(mPackageName, durationInMs, queueMode, mCachedParams);
} catch (RemoteException e) {
@@ -870,6 +911,7 @@ public class TextToSpeech {
mStarted = false;
initTts();
} finally {
+ resetCachedParams();
return result;
}
}
@@ -1224,6 +1266,7 @@ public class TextToSpeech {
*/
public int synthesizeToFile(String text, HashMap<String,String> params,
String filename) {
+ Log.i("TTS", "synthesizeToFile()");
synchronized (mStartLock) {
int result = ERROR;
if (!mStarted) {
@@ -1232,14 +1275,9 @@ public class TextToSpeech {
try {
if ((params != null) && (!params.isEmpty())) {
// no need to read the stream type here
- String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
- }
- extra = params.get(Engine.KEY_PARAM_ENGINE);
- if (extra != null) {
- mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra;
- }
+ setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+ Engine.PARAM_POSITION_UTTERANCE_ID);
+ setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE);
}
result = mITts.synthesizeToFile(mPackageName, text, mCachedParams, filename) ?
SUCCESS : ERROR;
@@ -1277,6 +1315,19 @@ public class TextToSpeech {
mCachedParams[Engine.PARAM_POSITION_STREAM + 1] =
String.valueOf(Engine.DEFAULT_STREAM);
mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID+ 1] = "";
+ mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING;
+ mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING;
+ }
+
+ /**
+ * Convenience method to save a parameter in the cached parameter array, at the given index,
+ * for a property saved in the given hashmap.
+ */
+ private void setCachedParam(HashMap<String,String> params, String key, int keyIndex) {
+ String extra = params.get(key);
+ if (extra != null) {
+ mCachedParams[keyIndex+1] = extra;
+ }
}
/**
diff --git a/packages/TtsService/jni/android_tts_SynthProxy.cpp b/packages/TtsService/jni/android_tts_SynthProxy.cpp
index 8dc88db..27d1fc0 100644
--- a/packages/TtsService/jni/android_tts_SynthProxy.cpp
+++ b/packages/TtsService/jni/android_tts_SynthProxy.cpp
@@ -17,7 +17,7 @@
#include <stdio.h>
#include <unistd.h>
-#define LOG_TAG "SynthProxy"
+#define LOG_TAG "SynthProxyJNI"
#include <utils/Log.h>
#include <nativehelper/jni.h>
@@ -33,8 +33,8 @@
#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT
#define DEFAULT_TTS_NB_CHANNELS 1
#define DEFAULT_TTS_BUFFERSIZE 2048
-// TODO use the TTS stream type when available
#define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
+#define DEFAULT_VOLUME 1.0f
// EQ + BOOST parameters
#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
@@ -154,7 +154,7 @@ static Mutex engineMutex;
class SynthProxyJniStorage {
public :
jobject tts_ref;
- android_tts_engine_t* mEngine;
+ android_tts_engine_t* mEngine;
void* mEngineLibHandle;
AudioTrack* mAudioOut;
int8_t mPlayState;
@@ -165,6 +165,7 @@ class SynthProxyJniStorage {
int mNbChannels;
int8_t * mBuffer;
size_t mBufferSize;
+ float mVolume[2];
SynthProxyJniStorage() {
tts_ref = NULL;
@@ -179,6 +180,8 @@ class SynthProxyJniStorage {
mBufferSize = DEFAULT_TTS_BUFFERSIZE;
mBuffer = new int8_t[mBufferSize];
memset(mBuffer, 0, mBufferSize);
+ mVolume[AudioTrack::LEFT] = DEFAULT_VOLUME;
+ mVolume[AudioTrack::RIGHT] = DEFAULT_VOLUME;
}
~SynthProxyJniStorage() {
@@ -189,7 +192,7 @@ class SynthProxyJniStorage {
mEngine = NULL;
}
if (mEngineLibHandle) {
- //LOGE("~SynthProxyJniStorage(): before close library");
+ //LOGV("~SynthProxyJniStorage(): before close library");
int res = dlclose(mEngineLibHandle);
LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res);
}
@@ -241,7 +244,7 @@ class SynthProxyJniStorage {
mAudioOut = NULL;
} else {
//LOGI("AudioTrack OK");
- mAudioOut->setVolume(1.0f, 1.0f);
+ mAudioOut->setVolume(mVolume[AudioTrack::LEFT], mVolume[AudioTrack::RIGHT]);
LOGV("AudioTrack ready");
}
mPlayLock.unlock();
@@ -277,7 +280,7 @@ extern "C" android_tts_callback_status_t
__ttsSynthDoneCB(void ** pUserdata, uint32_t rate,
android_tts_audio_format_t format, int channel,
int8_t **pWav, size_t *pBufferSize,
- android_tts_synth_status_t status)
+ android_tts_synth_status_t status)
{
//LOGV("ttsSynthDoneCallback: %d bytes", bufferSize);
AudioSystem::audio_format encoding;
@@ -618,7 +621,7 @@ android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData,
Mutex::Autolock l(engineMutex);
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
- LOGI("setting speech rate to %d", speechRate);
+ //LOGI("setting speech rate to %d", speechRate);
android_tts_engine_t *engine = pSynthData->mEngine;
if (engine) {
@@ -647,7 +650,7 @@ android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData,
sprintf(buffer, "%d", pitch);
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
- LOGI("setting pitch to %d", pitch);
+ //LOGI("setting pitch to %d", pitch);
android_tts_engine_t *engine = pSynthData->mEngine;
if (engine) {
@@ -783,7 +786,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
static int
android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
- jstring textJavaString, jint javaStreamType)
+ jstring textJavaString, jint javaStreamType, jfloat volume, jfloat pan)
{
int result = ANDROID_TTS_FAILURE;
@@ -798,9 +801,34 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
- pSynthData->mPlayLock.lock();
- pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
- pSynthData->mPlayLock.unlock();
+ {//scope for lock on mPlayLock
+ Mutex::Autolock _l(pSynthData->mPlayLock);
+
+ pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
+
+ // clip volume and pan
+ float vol = (volume > 1.0f) ? 1.0f : (volume < 0.0f) ? 0.0f : volume;
+ float panning = (pan > 1.0f) ? 1.0f : (pan < -1.0f) ? -1.0f : pan;
+ // compute playback volume based on volume and pan, using balance rule, in order to avoid
+ // lowering volume when panning in center
+ pSynthData->mVolume[AudioTrack::LEFT] = vol;
+ pSynthData->mVolume[AudioTrack::RIGHT] = vol;
+ if (panning > 0.0f) {
+ pSynthData->mVolume[AudioTrack::LEFT] *= (1.0f - panning);
+ } else if (panning < 0.0f) {
+ pSynthData->mVolume[AudioTrack::RIGHT] *= (1.0f + panning);
+ }
+
+ // apply the volume if there is an output
+ if (NULL != pSynthData->mAudioOut) {
+ pSynthData->mAudioOut->setVolume(pSynthData->mVolume[AudioTrack::LEFT],
+ pSynthData->mVolume[AudioTrack::RIGHT]);
+ }
+
+ //LOGV("android_tts_SynthProxy_speak() vol=%.3f pan=%.3f, mVolume=[%.1f %.1f]",
+ // volume, pan,
+ // pSynthData->mVolume[AudioTrack::LEFT], pSynthData->mVolume[AudioTrack::RIGHT]);
+ }
afterSynthData_t* pForAfter = new (afterSynthData_t);
pForAfter->jniStorage = jniData;
@@ -935,7 +963,7 @@ static JNINativeMethod gMethods[] = {
(void*)android_tts_SynthProxy_stopSync
},
{ "native_speak",
- "(ILjava/lang/String;I)I",
+ "(ILjava/lang/String;IFF)I",
(void*)android_tts_SynthProxy_speak
},
{ "native_synthesizeToFile",
diff --git a/packages/TtsService/src/android/tts/SynthProxy.java b/packages/TtsService/src/android/tts/SynthProxy.java
index 525a504..f5f5fcf 100755
--- a/packages/TtsService/src/android/tts/SynthProxy.java
+++ b/packages/TtsService/src/android/tts/SynthProxy.java
@@ -78,12 +78,13 @@ public class SynthProxy {
/**
* Synthesize speech and speak it directly using AudioTrack.
*/
- public int speak(String text, int streamType) {
+ public int speak(String text, int streamType, float volume, float pan) {
+ Log.i(TAG, "speak() on stream "+ streamType);
if ((streamType > -1) && (streamType < AudioSystem.getNumStreamTypes())) {
- return native_speak(mJniData, text, streamType);
+ return native_speak(mJniData, text, streamType, volume, pan);
} else {
Log.e("SynthProxy", "Trying to speak with invalid stream type " + streamType);
- return native_speak(mJniData, text, AudioManager.STREAM_MUSIC);
+ return native_speak(mJniData, text, AudioManager.STREAM_MUSIC, volume, pan);
}
}
@@ -93,6 +94,7 @@ public class SynthProxy {
* "/sdcard/???.wav" is recommended.
*/
public int synthesizeToFile(String text, String filename) {
+ Log.i(TAG, "synthesizeToFile() to file "+ filename);
return native_synthesizeToFile(mJniData, text, filename);
}
@@ -192,7 +194,8 @@ public class SynthProxy {
private native final int native_stopSync(int jniData);
- private native final int native_speak(int jniData, String text, int streamType);
+ private native final int native_speak(int jniData, String text, int streamType, float volume,
+ float pan);
private native final int native_synthesizeToFile(int jniData, String text, String filename);
diff --git a/packages/TtsService/src/android/tts/TtsService.java b/packages/TtsService/src/android/tts/TtsService.java
index 08bbfb2..c562327 100755
--- a/packages/TtsService/src/android/tts/TtsService.java
+++ b/packages/TtsService/src/android/tts/TtsService.java
@@ -121,7 +121,6 @@ public class TtsService extends Service implements OnCompletionListener {
private static final int SPEECHQUEUELOCK_TIMEOUT = 5000;
private static final int MAX_SPEECH_ITEM_CHAR_LENGTH = 4000;
private static final int MAX_FILENAME_LENGTH = 250;
- // TODO use the TTS stream type when available
private static final int DEFAULT_STREAM_TYPE = AudioManager.STREAM_MUSIC;
// TODO use TextToSpeech.DEFAULT_SYNTH once it is unhidden
private static final String DEFAULT_SYNTH = "com.svox.pico";
@@ -791,6 +790,8 @@ public class TtsService extends Service implements OnCompletionListener {
String speechRate = "";
String engine = "";
String pitch = "";
+ float volume = TextToSpeech.Engine.DEFAULT_VOLUME;
+ float pan = TextToSpeech.Engine.DEFAULT_PAN;
if (speechItem.mParams != null){
for (int i = 0; i < speechItem.mParams.size() - 1; i = i + 2){
String param = speechItem.mParams.get(i);
@@ -816,6 +817,18 @@ public class TtsService extends Service implements OnCompletionListener {
engine = speechItem.mParams.get(i + 1);
} else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PITCH)) {
pitch = speechItem.mParams.get(i + 1);
+ } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_VOLUME)) {
+ try {
+ volume = Float.parseFloat(speechItem.mParams.get(i + 1));
+ } catch (NumberFormatException e) {
+ volume = TextToSpeech.Engine.DEFAULT_VOLUME;
+ }
+ } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PAN)) {
+ try {
+ pan = Float.parseFloat(speechItem.mParams.get(i + 1));
+ } catch (NumberFormatException e) {
+ pan = TextToSpeech.Engine.DEFAULT_PAN;
+ }
}
}
}
@@ -844,7 +857,7 @@ public class TtsService extends Service implements OnCompletionListener {
setPitch("", getDefaultPitch());
}
try {
- sNativeSynth.speak(speechItem.mText, streamType);
+ sNativeSynth.speak(speechItem.mText, streamType, volume, pan);
} catch (NullPointerException e) {
// synth will become null during onDestroy()
Log.v(SERVICE_TAG, " null synth, can't speak");