summaryrefslogtreecommitdiffstats
path: root/pico
diff options
context:
space:
mode:
authorJean-Michel Trivi <jmtrivi@google.com>2009-07-01 18:13:11 -0700
committerJean-Michel Trivi <jmtrivi@google.com>2009-07-01 18:43:33 -0700
commit39358f0dacad8cece6c2d3ef1055030f57090c79 (patch)
tree94e7279dc71010d536cc7bd9743ab4b603927d20 /pico
parent5f6105e73f0c74acd10126175d4952ade10a0b05 (diff)
downloadexternal_svox-39358f0dacad8cece6c2d3ef1055030f57090c79.zip
external_svox-39358f0dacad8cece6c2d3ef1055030f57090c79.tar.gz
external_svox-39358f0dacad8cece6c2d3ef1055030f57090c79.tar.bz2
- Integrate SVOX's latest code drop.
- Fix a bug where TTS_SYNTH_DONE was signaled to the TTS service synth proxy even though the synthesis wasn't done (more data to be played). - Deprecated the synthesizeIPA() function as it will be supported by the phoneme tag in synthesize().
Diffstat (limited to 'pico')
-rwxr-xr-xpico/Android.mk4
-rw-r--r--pico/tts/com_svox_picottsengine.cpp1172
-rwxr-xr-xpico/tts/svox_ssml_parser.cpp893
-rwxr-xr-xpico/tts/svox_ssml_parser.h162
4 files changed, 1802 insertions, 429 deletions
diff --git a/pico/Android.mk b/pico/Android.mk
index 485ca70..7a182e4 100755
--- a/pico/Android.mk
+++ b/pico/Android.mk
@@ -25,7 +25,7 @@ include $(BUILD_PACKAGE)
LOCAL_PATH:= $(TOP_LOCAL_PATH)/tts
include $(CLEAR_VARS)
-LOCAL_SRC_FILES:= com_svox_picottsengine.cpp
+LOCAL_SRC_FILES:= com_svox_picottsengine.cpp svox_ssml_parser.cpp
LOCAL_C_INCLUDES += \
external/svox/pico/lib \
@@ -33,7 +33,7 @@ LOCAL_C_INCLUDES += \
LOCAL_STATIC_LIBRARIES:= libsvoxpico
-LOCAL_SHARED_LIBRARIES:= libcutils
+LOCAL_SHARED_LIBRARIES:= libcutils libexpat libutils
LOCAL_MODULE:= libttspico
diff --git a/pico/tts/com_svox_picottsengine.cpp b/pico/tts/com_svox_picottsengine.cpp
index 8a6d1d5..49c1f22 100644
--- a/pico/tts/com_svox_picottsengine.cpp
+++ b/pico/tts/com_svox_picottsengine.cpp
@@ -1,4 +1,5 @@
-/*
+/* com_svox_picottsengine.cpp
+
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,11 +13,35 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *
- * History:
- * 2009-05-18 -- initial version
- * 2009-06-04 -- updated for new TtsEngine interface
- *
+
+ This is the Manager layer. It sits on top of the native Pico engine
+ and provides the interface to the defined Google TTS engine API.
+ The Google engine API is the boundary to allow a TTS engine to be swapped.
+ The Manager layer also provide the SSML tag interpretation.
+ The supported SSML tags are mapped to corresponding tags natively supported by Pico.
+ Native Pico functions always begin with picoXXX.
+
+ In the Pico engine, the language cannot be changed indpendently of the voice.
+ If either the voice or locale/language are changed, a new resource is loaded.
+
+ Only a subset of SSML 1.0 tags are supported.
+ Some SSML tags involve significant complexity.
+ If the language is changed through an SSML tag, there is a latency for the load.
+
+ History:
+ 2009-07-01 -- clean up & documentation
+ 2009-06-29 -- integrated SSML parser and IPA to XSAMPA conversion
+ 2009-06-29 -- revised for C90 compliance
+ 2009-06-04 -- updated for new TtsEngine interface
+ 2009-05-18 -- initial version
+
+
+ ToDo:
+ - duplicate 'phoneme ph=' ***
+ - normalize IPA strings **
+ - change voice as well as language *
+ Changing of voice/language is complex due to potentially multiple swaps
+ under SSML and the latent effects. This requires storing a history of tags/sets.
*/
#include <stdio.h>
@@ -25,70 +50,75 @@
#define LOG_TAG "SVOX Pico Engine"
#include <utils/Log.h>
+#include <utils/String16.h> /* for strlen16 */
#include <android_runtime/AndroidRuntime.h>
#include <tts/TtsEngine.h>
+#include <cutils/jstring.h>
#include <picoapi.h>
#include <picodefs.h>
+#include "svox_ssml_parser.h"
using namespace android;
/* adaptation layer defines */
#define PICO_MEM_SIZE 2500000
-#define PICO_MIN_RATE 20
/* speaking rate */
-#define PICO_DEF_RATE 100
+#define PICO_MIN_RATE 20
#define PICO_MAX_RATE 500
-#define PICO_MIN_PITCH 50
+#define PICO_DEF_RATE 100
/* speaking pitch */
-#define PICO_DEF_PITCH 100
+#define PICO_MIN_PITCH 50
#define PICO_MAX_PITCH 200
-#define PICO_MIN_VOLUME 0
+#define PICO_DEF_PITCH 100
/* speaking volume */
-#define PICO_DEF_VOLUME 400
+#define PICO_MIN_VOLUME 0
#define PICO_MAX_VOLUME 500
+#define PICO_DEF_VOLUME 250
+
/* string constants */
#define MAX_OUTBUF_SIZE 128
-const char* PICO_LINGWARE_PATH = "/sdcard/svox/";
-const char* PICO_VOICE_NAME = "PicoVoice";
-const char* PICO_SPEED_OPEN_TAG = "<speed level='%d'>";
-const char* PICO_SPEED_CLOSE_TAG = "</speed>";
-const char* PICO_PITCH_OPEN_TAG = "<pitch level='%d'>";
-const char* PICO_PITCH_CLOSE_TAG = "</pitch>";
-const char* PICO_VOLUME_OPEN_TAG = "<volume level='%d'>";
-const char* PICO_VOLUME_CLOSE_TAG = "</volume>";
-const char* PICO_PHONEME_OPEN_TAG = "<phoneme ph=\"%s\">";
-
-/* supported voices */
-const char* picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" };
-const char* picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" };
-const char* picoSupportedLang[] = { "en-rUS", "en-rGB", "de-rDE", "es-rES", "fr-rFR", "it-rIT" };
-const char* picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
-const char* picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" };
-const char* picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
-const char* picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" };
-const int picoNumSupportedLang = 6;
+const char * PICO_LINGWARE_PATH = "/sdcard/svox/";
+const char * PICO_VOICE_NAME = "PicoVoice";
+const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>";
+const char * PICO_SPEED_CLOSE_TAG = "</speed>";
+const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>";
+const char * PICO_PITCH_CLOSE_TAG = "</pitch>";
+const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>";
+const char * PICO_VOLUME_CLOSE_TAG = "</volume>";
+const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='";
+
+/* supported voices
+ Pico does not seperately specify the voice and locale. */
+const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" };
+const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" };
+const char * picoSupportedLang[] = { "en-rUS", "en-rGB", "de-rDE", "es-rES", "fr-rFR", "it-rIT" };
+const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
+const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" };
+const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
+const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" };
+const int picoNumSupportedVocs = 6;
/* supported properties */
-const char* picoSupportedProperties[] = { "language", "rate", "pitch", "volume" };
-const int picoNumSupportedProperties = 4;
+const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" };
+const int picoNumSupportedProperties = 4;
/* adapation layer global variables */
-synthDoneCB_t* picoSynthDoneCBPtr;
-void* picoMemArea = NULL;
-pico_System picoSystem = NULL;
-pico_Resource picoTaResource = NULL;
-pico_Resource picoSgResource = NULL;
-pico_Resource picoUtppResource = NULL;
-pico_Engine picoEngine = NULL;
-pico_Char* picoTaFileName = NULL;
-pico_Char* picoSgFileName = NULL;
-pico_Char* picoUtppFileName = NULL;
-pico_Char* picoTaResourceName = NULL;
-pico_Char* picoSgResourceName = NULL;
-pico_Char* picoUtppResourceName = NULL;
+synthDoneCB_t * picoSynthDoneCBPtr;
+void * picoMemArea = NULL;
+pico_System picoSystem = NULL;
+pico_Resource picoTaResource = NULL;
+pico_Resource picoSgResource = NULL;
+pico_Resource picoUtppResource = NULL;
+pico_Engine picoEngine = NULL;
+pico_Char * picoTaFileName = NULL;
+pico_Char * picoSgFileName = NULL;
+pico_Char * picoUtppFileName = NULL;
+pico_Char * picoTaResourceName = NULL;
+pico_Char * picoSgResourceName = NULL;
+pico_Char * picoUtppResourceName = NULL;
int picoSynthAbort = 0;
-char* picoProp_currLang = NULL; /* current language */
+char * picoProp_currLang = NULL; /* current language */
int picoProp_currRate = PICO_DEF_RATE; /* current rate */
int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */
int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */
@@ -98,31 +128,38 @@ int picoCurrentLangIndex = -1;
/* internal helper functions */
-/** checkForLanguage
- * Check if the requested language is among the supported languages.
- * @language - the language to check, either in xx or xx-rYY format
- * return index of the language, or -1 if not supported.
+/** checkForLocale
+ * Check whether the requested locale is among the supported locales.
+ * @locale - the locale to check, either in xx or xx-rYY format
+ * return index of the locale, or -1 if not supported.
*/
-static int checkForLanguage( const char * language )
+static int checkForLocale( const char * locale )
{
int found = -1; /* language not found */
+ int i;
+ if (locale == NULL) {
+ LOGE("checkForLanguage called with NULL language");
+ return found;
+ }
/* Verify that the requested locale is a locale that we support. */
- for (int i = 0; i < picoNumSupportedLang; i++)
+ for (i = 0; i < picoNumSupportedVocs; i ++)
{
- if (strcmp(language, picoSupportedLang[i]) == 0)
+ if (strcmp(locale, picoSupportedLang[i]) == 0) /* in array */
{
found = i;
break;
}
};
+
+ /* The locale was not found. */
if (found < 0)
{
/* We didn't find an exact match; it may have been specified with only the first 2 characters.
- This could overmatch ISO 639-3 language codes. */
- for (int i = 0; i < picoNumSupportedLang; i++)
+ This could overmatch ISO 639-3 language codes.%% */
+ for (i = 0; i < picoNumSupportedVocs; i ++)
{
- if (strncmp(language, picoSupportedLang[i], 2) == 0)
+ if (strncmp(locale, picoSupportedLang[i], 2) == 0)
{
found = i;
break;
@@ -130,7 +167,7 @@ static int checkForLanguage( const char * language )
}
if (found < 0)
{
- LOGE("TtsEngine::set language called with unsupported language");
+ LOGE("TtsEngine::set language called with unsupported locale");
}
};
return found;
@@ -145,7 +182,7 @@ static void cleanResources( void )
if (picoEngine)
{
pico_disposeEngine( picoSystem, &picoEngine );
- pico_releaseVoiceDefinition(picoSystem, (pico_Char*)PICO_VOICE_NAME);
+ pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
picoEngine = NULL;
}
if (picoUtppResource)
@@ -174,43 +211,43 @@ static void cleanFiles( void )
{
if (picoProp_currLang)
{
- free(picoProp_currLang);
+ free( picoProp_currLang );
picoProp_currLang = NULL;
}
if (picoTaFileName)
{
- free(picoTaFileName);
+ free( picoTaFileName );
picoTaFileName = NULL;
}
if (picoSgFileName)
{
- free(picoSgFileName);
+ free( picoSgFileName );
picoSgFileName = NULL;
}
if (picoUtppFileName)
{
- free(picoUtppFileName);
+ free( picoUtppFileName );
picoUtppFileName = NULL;
}
if (picoTaResourceName)
{
- free(picoTaResourceName);
+ free( picoTaResourceName );
picoTaResourceName = NULL;
}
if (picoSgResourceName)
{
- free(picoSgResourceName);
+ free( picoSgResourceName );
picoSgResourceName = NULL;
}
if (picoUtppResourceName)
{
- free(picoUtppResourceName);
+ free( picoUtppResourceName );
picoUtppResourceName = NULL;
}
}
@@ -249,44 +286,47 @@ static bool hasResourcesForLanguage(int langIndex) {
}
/** doLanguageSwitchFromLangIndex
- * Switch to requested language. If language is already loaded it returns
- * immediately, if another language is loaded this will first be unloaded
- * and the new one then loaded. If no language is loaded the requested will be loaded.
- * @langIndex - the index of the language to load, which is guaranteed to be supported.
+ * Switch to the requested locale.
+ * If the locale is already loaded, it returns immediately.
+ * If another locale is already is loaded, it will first be unloaded and the new one then loaded.
+ * If no locale is loaded, the requested locale will be loaded.
+ * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported.
* return TTS_SUCCESS or TTS_FAILURE
*/
-static tts_result doLanguageSwitchFromLangIndex(int langIndex)
+static tts_result doLanguageSwitchFromLangIndex( int langIndex )
{
- // if we already have a loaded language, check if it's the same one as requested
+ int ret; /* function result code */
+
+ /* If we already have a loaded locale, check whether it is the same one as requested. */
if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0))
{
LOGI("Language already loaded (%s == %s)", picoProp_currLang, picoSupportedLang[langIndex]);
return TTS_SUCCESS;
}
- // not the same language, unload the current one first
+ /* It is not the same locale; unload the current one first. */
cleanResources();
- // allocate memory for file and resource names
+ /* Allocate memory for file and resource names. */
cleanFiles();
- picoProp_currLang = (char*)malloc(10);
- picoTaFileName = (pico_Char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
- picoSgFileName = (pico_Char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
- picoUtppFileName = (pico_Char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
- picoTaResourceName = (pico_Char*)malloc(PICO_MAX_RESOURCE_NAME_SIZE);
- picoSgResourceName = (pico_Char*)malloc(PICO_MAX_RESOURCE_NAME_SIZE);
- picoUtppResourceName = (pico_Char*)malloc(PICO_MAX_RESOURCE_NAME_SIZE);
-
- // set path and file names for resource files
- strcpy((char*)picoTaFileName, PICO_LINGWARE_PATH);
- strcat((char*)picoTaFileName, (const char*)picoInternalTaLingware[langIndex]);
- strcpy((char*)picoSgFileName, PICO_LINGWARE_PATH);
- strcat((char*)picoSgFileName, (const char*)picoInternalSgLingware[langIndex]);
- strcpy((char*)picoUtppFileName, PICO_LINGWARE_PATH);
- strcat((char*)picoUtppFileName, (const char*)picoInternalUtppLingware[langIndex]);
-
- // load text analysis Lingware resource file
- int ret = pico_loadResource(picoSystem, picoTaFileName, &picoTaResource);
+ picoProp_currLang = (char *) malloc( 10 );
+ picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
+ picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
+ picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
+ picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
+ picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
+ picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
+
+ /* Set the path and file names for resource files. */
+ strcpy((char *) picoTaFileName, PICO_LINGWARE_PATH);
+ strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]);
+ strcpy((char *) picoSgFileName, PICO_LINGWARE_PATH);
+ strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]);
+ strcpy((char *) picoUtppFileName, PICO_LINGWARE_PATH);
+ strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
+
+ /* Load the text analysis Lingware resource file. */
+ ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource );
if (PICO_OK != ret)
{
LOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -295,8 +335,8 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- // load signal generation Lingware resource file
- ret = pico_loadResource(picoSystem, picoSgFileName, &picoSgResource);
+ /* Load the signal generation Lingware resource file. */
+ ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource );
if (PICO_OK != ret)
{
LOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -305,20 +345,20 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- // Load utpp Lingware resource file if exists - NOTE: this file is optional
- // and is currently not used. Loading is only attempted for future compatibility.
- // If this file is not present the loading will still succeed.
- ret = pico_loadResource(picoSystem, picoUtppFileName, &picoUtppResource);
- if (PICO_OK != ret && ret != PICO_EXC_CANT_OPEN_FILE)
- {
+ /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
+ and is currently not used. Loading is only attempted for future compatibility.
+ If this file is not present the loading will still succeed. */
+ ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
+ if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE))
+ {
LOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret);
cleanResources();
cleanFiles();
return TTS_FAILURE;
- }
+ }
- // Get text analysis resource name
- ret = pico_getResourceName(picoSystem, picoTaResource, (char*)picoTaResourceName);
+ /* Get the text analysis resource name. */
+ ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName );
if (PICO_OK != ret)
{
LOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -327,12 +367,12 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- // Get signal generation resource name
- ret = pico_getResourceName(picoSystem, picoSgResource, (char*)picoSgResourceName);
+ /* Get the signal generation resource name. */
+ ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName );
if (PICO_OK == ret && picoUtppResource != NULL)
{
- // Get utpp resource name - optional: see note above
- ret = pico_getResourceName(picoSystem, picoUtppResource, (char*)picoUtppResourceName);
+ /* Get utpp resource name - optional: see note above. */
+ ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName );
if (PICO_OK != ret)
{
LOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -349,8 +389,8 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- // create a voice definition
- ret = pico_createVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME);
+ /* Create a voice definition. */
+ ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME );
if (PICO_OK != ret)
{
LOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -359,8 +399,8 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- // add text analysis resource to voice
- ret = pico_addResourceToVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME, picoTaResourceName);
+ /* Add the text analysis resource to the voice. */
+ ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName );
if (PICO_OK != ret)
{
LOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -369,12 +409,12 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- // add signal generation resource to voice
- ret = pico_addResourceToVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME, picoSgResourceName);
+ /* Add the signal generation resource to the voice. */
+ ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName );
if (PICO_OK == ret && picoUtppResource != NULL)
{
- // add utpp resource to voice - optional: see note above
- ret = pico_addResourceToVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME, picoUtppResourceName);
+ /* Add utpp resource to voice - optional: see note above. */
+ ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName );
if (PICO_OK != ret)
{
LOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -392,7 +432,7 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- ret = pico_newEngine(picoSystem, (const pico_Char*)PICO_VOICE_NAME, &picoEngine);
+ ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine );
if (PICO_OK != ret)
{
LOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret);
@@ -401,47 +441,53 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex)
return TTS_FAILURE;
}
- strcpy(picoProp_currLang, picoSupportedLang[langIndex]);
- picoCurrentLangIndex = langIndex;
-
+ /* Set the current locale/voice. */
+ strcpy( picoProp_currLang, picoSupportedLang[langIndex] );
LOGI("loaded %s successfully", picoProp_currLang);
-
return TTS_SUCCESS;
}
+
/** doLanguageSwitch
- * Switch to requested language. If language is already loaded it returns
- * immediately, if another language is loaded this will first be unloaded
- * and the new one then loaded. If no language is loaded the requested will be loaded.
- * @language - the language to check, either in xx or xx-rYY format (i.e "en" or "en-rUS")
+ * Switch to the requested locale.
+ * If this locale is already loaded, it returns immediately.
+ * If another locale is already loaded, this will first be unloaded
+ * and the new one then loaded.
+ * If no locale is loaded, the requested will be loaded.
+ * @locale - the locale to check, either in xx or xx-rYY format (i.e "en" or "en-rUS")
* return TTS_SUCCESS or TTS_FAILURE
*/
-static tts_result doLanguageSwitch(const char* language)
+static tts_result doLanguageSwitch( const char * locale )
{
- // load new language
- int langIndex = checkForLanguage(language);
- if (langIndex < 0)
- {
- LOGE("Tried to swith to non-supported language %s", language);
- return TTS_FAILURE;
- }
- LOGI("Found supported language %s", picoSupportedLang[langIndex]);
+ int loclIndex; /* locale index */
- return doLanguageSwitchFromLangIndex( langIndex );
+ /* Load the new locale. */
+ loclIndex = checkForLocale( locale );
+ if (loclIndex < 0)
+ {
+ LOGE("Tried to swith to non-supported locale %s", locale);
+ return TTS_FAILURE;
+ }
+ LOGI("Found supported locale %s", picoSupportedLang[loclIndex]);
+ return doLanguageSwitchFromLangIndex( loclIndex );
}
+
/** doAddProperties
- * Add <speed>, <pitch> and <volume> tags to text, if the properties have been set to non-default values,
- * and return the new string. The calling function is responsible for freeing the returned string.
+ * Add <speed>, <pitch> and <volume> tags to the text,
+ * if the properties have been set to non-default values, and return the new string.
+ * The calling function is responsible for freeing the returned string.
* @str - text to apply tags to
* return new string with tags applied
*/
-static char* doAddProperties(const char* str)
+static char * doAddProperties( const char * str )
{
- char* data = NULL;
- int haspitch = 0, hasspeed = 0, hasvol = 0;
- int textlen = strlen(str) + 1;
+ char * data = NULL;
+ int haspitch, hasspeed, hasvol; /* parameters */
+ int textlen; /* property string length */
+ haspitch = 0; hasspeed = 0; hasvol = 0;
+ textlen = strlen(str) + 1;
if (picoProp_currPitch != PICO_DEF_PITCH) /* non-default pitch */
{
textlen += strlen(PICO_PITCH_OPEN_TAG) + 5;
@@ -461,7 +507,8 @@ static char* doAddProperties(const char* str)
hasvol = 1;
}
- data = (char*)malloc(textlen);
+ /* Compose the property strings. */
+ data = (char *) malloc( textlen ); /* allocate string */
if (!data)
{
return NULL;
@@ -512,10 +559,292 @@ static char* doAddProperties(const char* str)
}
-/* API function implementations */
+/** createPhonemeString
+ * Wrap all individual words in <phoneme> tags.
+ * The Pico <phoneme> tag only supports one word in each tag,
+ * therefore they must be individually wrapped!
+ * @xsampa - text to convert to Pico phomene string
+ * @length - length of the input string
+ * return new string with tags applied
+*/
+static char * createPhonemeString( const char * xsampa, int length )
+{
+ char * convstring = NULL;
+ int origStrLen = strlen(xsampa);
+ int numWords = 1;
+ int start, totalLength, i, j;
+
+ for (i = 0; i < origStrLen; i ++)
+ {
+ if (xsampa[i] == ' ')
+ numWords ++;
+ }
+
+ if (numWords == 1)
+ {
+ convstring = new char[origStrLen + 17];
+ convstring[0] = '\0';
+ strcat(convstring, "<phoneme ph='");
+ strcat(convstring, xsampa);
+ strcat(convstring, "'/>");
+ }
+ else
+ {
+ char * words[numWords];
+ start = 0; totalLength = 0; i = 0; j = 0;
+ for (i=0, j=0; i < origStrLen; i ++)
+ {
+ if (xsampa[i] == ' ')
+ {
+ words[j] = new char[i+1-start+17];
+ words[j][0] = '\0';
+ strcat( words[j], "<phoneme ph='");
+ strncat(words[j], xsampa+start, i-start);
+ strcat( words[j], "'/>");
+ start = i + 1;
+ j ++;
+ totalLength += strlen(words[j-1]);
+ }
+ }
+ words[j] = new char[i+1-start+17];
+ words[j][0] = '\0';
+ strcat(words[j], "<phoneme ph='");
+ strcat(words[j], xsampa+start);
+ strcat(words[j], "'/>");
+ totalLength += strlen(words[j]);
+ convstring = new char[totalLength + 1];
+ convstring[0] = '\0';
+ for (i=0; i < numWords; i ++)
+ {
+ strcat(convstring, words[i]);
+ delete [] words[i];
+ }
+ }
+ return convstring;
+}
+
+
+/* The XSAMPA uses as many as 4 characters to represent a single IPA code. */
+typedef struct tagPhnArr
+ {
+ char16_t strIPA; /* IPA Unicode symbol */
+ char strXSAMPA[5]; /* SAMPA sequence */
+ } PArr;
+
+#define phn_cnt (133)
+
+PArr PhnAry[phn_cnt] = {
+
+ /* XSAMPA conversion table */
+
+ /* Vowels (23) complete */
+ {0x025B, "E"},
+ {0x0251, "A"},
+ {0x0254, "O"},
+ {0x00F8, "2"},
+ {0x0153, "9"},
+ {0x0276, "&"},
+ {0x0252, "Q"},
+ {0x028C, "V"},
+ {0x0264, "7"},
+ {0x026F, "M"},
+ {0x0268, "1"},
+ {0x0289, "}"},
+ {0x026A, "I"},
+ {0x028F, "Y"},
+ {0x028A, "U"},
+ {0x0259, "@"},
+ {0x0275, "8"},
+ {0x0250, "6"},
+ {0x00E6, "{"},
+ {0x025C, "3"},
+ {0x025A, "@'"},
+ {0x025E, "3\\"},
+ {0x0258, "@\\"},
+
+ /* Consonants (60) complete */
+ {0x0288, "t'"},
+ {0x0256, "d'"},
+ {0x025F, "J\\"},
+ {0x0261, "g"},
+ {0x0262, "G\\"},
+ {0x0294, "?"},
+ {0x0271, "F"},
+ {0x0273, "n'"},
+ {0x0272, "J"},
+ {0x014B, "N"},
+ {0x0274, "N\\"},
+ {0x0299, "B\\"},
+ {0x0280, "R\\"},
+ {0x027E, "4"},
+ {0x027D, "r'"},
+ {0x0278, "p\\"},
+ {0x03B2, "B"},
+ {0x03B8, "T"},
+ {0x00F0, "D"},
+ {0x0283, "S"},
+ {0x0292, "Z"},
+ {0x0282, "s'"},
+ {0x0290, "z'"},
+ {0x00E7, "C"},
+ {0x029D, "j\\"},
+ {0x0263, "G"},
+ {0x03C7, "X"},
+ {0x0281, "R"},
+ {0x0127, "X\\"},
+ {0x0295, "?\\"},
+ {0x0266, "h\\"},
+ {0x026C, "K"},
+ {0x026E, "K\\"},
+ {0x028B, "P"},
+ {0x0279, "r\\"},
+ {0x027B, "r\\'"},
+ {0x0270, "M\\"},
+ {0x026D, "l'"},
+ {0x028E, "L"},
+ {0x029F, "L\\"},
+ {0x0253, "b_<"},
+ {0x0257, "d_<"},
+ {0x0284, "J\\_<"},
+ {0x0260, "g_<"},
+ {0x029B, "G\\_<"},
+ {0x028D, "W"},
+ {0x0265, "H"},
+ {0x029C, "H\\"},
+ {0x02A1, ">\\"},
+ {0x02A2, "<\\"},
+ {0x0298, "O\\"},
+ {0x01C0, "|\\"},
+ {0x01C3, "!\\"},
+ {0x01C2, "=\\"},
+ {0x01C1, "|\\|\\"},
+ {0x027A, "l\\"},
+ {0x0255, "s\\"},
+ {0x0291, "z\\"},
+ {0x0267, "x\\"},
+ {0x026B, "l_G"},
+
+ /* Diacritics (34) */
+ {0x02BC, "_>"},
+ {0x0325, "_0"},
+ {0x030A, "_0"},
+ {0x032C, "_v"},
+ {0x02B0, "_h"},
+ {0x0324, "_t"},
+ {0x0330, "_k"},
+ {0x033C, "_N"},
+ {0x032A, "_d"},
+ {0x033A, "_a"},
+ {0x033B, "_m"},
+ {0x0339, "_O"},
+ {0x031C, "_c"},
+ {0x031F, "_+"},
+ {0x0320, "_-"},
+ {0x0308, "_"}, /* centralized %% */
+ {0x033D, "_x"},
+ {0x0318, "_A"},
+ {0x0319, "_q"},
+ {0x02DE, "'"},
+ {0x02B7, "_w"},
+ {0x02B2, "_j"},
+ {0x02E0, "_G"},
+ {0x02E4, "_?\\"},
+ {0x0303, "~"},
+ {0x207F, "_n"},
+ {0x02E1, "_l"},
+ {0x031A, "_}"},
+ {0x0334, "_e"},
+ {0x031D, "_r"},
+ {0x031E, "_o"},
+ {0x0329, "="},
+ {0x032F, "_^"},
+ {0x02D0, ":"},
+
+ /* Others (10) incomplete%% */
+ {0x0361, "_"},
+ {0x035C, "_"},
+ {0x02C8, ""},
+ {0x02CC, "%"},
+ {0x02D1, ":\\"},
+ {0x0306, "_X"},
+ {0x2016, "||"},
+ {0x203F, "-\\"},
+ {0x2197, "<R>"},
+ {0x2198, "<F>"},
+
+ /* Affricates (6) complete */
+ {0x02A3, "d_z"},
+ {0x02A4, "d_Z"},
+ {0x02A5, "d_z\\"},
+ {0x02A6, "t_s"},
+ {0x02A7, "t_S"},
+ {0x02A8, "t_s\\"}
+ };
+
+
+void CnvIPAPnt( const char16_t IPnt, char * XPnt )
+{
+ char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */
+ int idx; /* index into table */
+
+ /* Convert an individual IPA codepoint.
+ A single IPA code could map to a string.
+ Search the table. If it is not found, use the same character.
+ Since most codepoints can be contained within 16 bits,
+ they are represented as wide chars. */
+ XPnt[0] = 0; /* clear the result string */
+
+ /* Search the table for the conversion. */
+ for (idx = 0; idx < phn_cnt; idx ++) /* for each item in table */
+ if (IPnt == PhnAry[idx].strIPA) /* matches IPA code */
+ {
+ strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */
+ return;
+ }
+ strcat(XPnt, (const char *)&ThisPnt); /* just copy it */
+}
+
+
+/** cnvIpaToXsampa
+ * Convert an IPA character string to an XSAMPA character string.
+ * @ipaString - input IPA string to convert
+ * @outXsampaString - converted XSAMPA string is passed back in this parameter
+ * return size of the new string
+*/
+int cnvIpaToXsampa( const char16_t * ipaString, char ** outXsampaString )
+{
+ size_t xsize; /* size of result */
+ size_t ilen; /* input length */
+ int ipidx; /* index into IPA string */
+ char * XPnt; /* short XSAMPA char sequence */
+
+ /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString.
+ It is the responsibility of the caller to free the allocated string.
+ Increment through the string. For each base & combination convert it to the XSAMP equivalent.
+ Because of the XSAMPA limitations, not all IPA characters will be covered. */
+ XPnt = (char *) malloc(6);
+ xsize = (4 * strlen16(ipaString)) + 8; /* assume more than double size */
+
+ *outXsampaString = (char *) malloc( xsize ); /* allocate return string */
+ *outXsampaString[0] = 0;
+ xsize = 0; /* clear final */
+ ilen = strlen16(ipaString); /* length of input UTF-16 */
+ for (ipidx = 0; ipidx < ilen; ipidx ++) /* for each IPA code */
+ {
+ CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted string */
+ strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */
+ };
+ free(XPnt);
+ xsize = strlen(*outXsampaString); /* get the final length */
+ return xsize;
+}
+
+
+
+/* Google Engine API function implementations */
/** init
- * Allocates Pico memory block and initializes Pico system.
+ * Allocates Pico memory block and initializes the Pico system.
* synthDoneCBPtr - Pointer to callback function which will receive generated samples
* return tts_result
*/
@@ -527,18 +856,18 @@ tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr )
return TTS_FAILURE;
}
- picoMemArea = malloc(PICO_MEM_SIZE);
+ picoMemArea = malloc( PICO_MEM_SIZE );
if (!picoMemArea)
{
LOGE("Failed to allocate memory for Pico system");
return TTS_FAILURE;
}
- pico_Status ret = pico_initialize(picoMemArea, PICO_MEM_SIZE, &picoSystem);
+ pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
if (PICO_OK != ret)
{
LOGE("Failed to initialize Pico system");
- free(picoMemArea);
+ free( picoMemArea );
picoMemArea = NULL;
return TTS_FAILURE;
}
@@ -571,10 +900,101 @@ tts_result TtsEngine::shutdown( void )
}
cleanFiles();
-
return TTS_SUCCESS;
}
+
+/** loadLanguage
+ * Load a new language.
+ * @lang - string with ISO 3 letter language code.
+ * @country - string with ISO 3 letter country code .
+ * @variant - string with language variant for that language and country pair.
+ * return tts_result
+*/
+tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
+{
+ return TTS_FAILURE;
+ //return setProperty("language", value, size);
+}
+
+
+/** setLanguage
+ * Load a new language (locale). Use the ISO 639-3 language codes.
+ * @lang - string with ISO 639-3 language code.
+ * @country - string with ISO 3 letter country code.
+ * @variant - string with language variant for that language and country pair.
+ * return tts_result
+ */
+tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
+{
+ int langIndex;
+ int countryIndex;
+ int i;
+
+ if (lang == NULL)
+ {
+ LOGE("TtsEngine::setLanguage called with NULL language");
+ return TTS_FAILURE;
+ }
+
+ /* We look for a match on the language first
+ then we look for a match on the country.
+ If no match on the language:
+ return an error.
+ If match on the language, but no match on the country:
+ load the language found for the language match.
+ If match on the language, and match on the country:
+ load the language found for the country match. */
+
+ /* Find a match on the language. */
+ langIndex = -1; /* no match */
+ for (i = 0; i < picoNumSupportedVocs; i ++)
+ {
+ if (strcmp(lang, picoSupportedLangIso3[i]) == 0)
+ {
+ langIndex = i;
+ break;
+ }
+ }
+ if (langIndex < 0)
+ {
+ /* The language isn't supported. */
+ LOGE("TtsEngine::setLanguage called with unsupported language");
+ return TTS_FAILURE;
+ }
+
+ /* Find a match on the country, if there is one. */
+ if (country != NULL)
+ {
+ countryIndex = -1;
+ for (i = langIndex; i < picoNumSupportedVocs; i ++)
+ {
+ if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0)
+ && (strcmp(country, picoSupportedCountryIso3[i]) == 0))
+ {
+ countryIndex = i;
+ break;
+ }
+ }
+
+ if (countryIndex < 0)
+ {
+ /* We didn't find a match on the country, but we had a match on the language.
+ Use that language. */
+ LOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).",
+ lang, country);
+ }
+ else
+ {
+ /* We have a match on both the language and the country. */
+ langIndex = countryIndex;
+ }
+ }
+
+ return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */
+}
+
+
/** isLanguageAvailable
* Returns the level of support for a language.
* @lang - string with ISO 3 letter language code.
@@ -595,7 +1015,7 @@ tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *
}
// find a match on the language
- for (int i = 0; i < picoNumSupportedLang; i++)
+ for (int i = 0; i < picoNumSupportedVocs; i++)
{
if (strcmp(lang, picoSupportedLangIso3[i]) == 0) {
langIndex = i;
@@ -617,7 +1037,7 @@ tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *
}
// find a match on the country
- for (int i = langIndex; i < picoNumSupportedLang; i++) {
+ for (int i = langIndex; i < picoNumSupportedVocs; i++) {
if ((strcmp(lang, picoSupportedLangIso3[i]) == 0)
&& (strcmp(country, picoSupportedCountryIso3[i]) == 0)) {
countryIndex = i;
@@ -638,81 +1058,6 @@ tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *
// no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned.
}
-/** loadLanguage
- * Load a new language.
- * @lang - string with ISO 3 letter language code.
- * @country - string with ISO 3 letter country code .
- * @variant - string with language variant for that language and country pair.
- * return tts_result
-*/
-tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
-{
- return TTS_FAILURE;
- //return setProperty("language", value, size);
-}
-
-/** setLanguage
- * Load a new language.
- * @lang - string with ISO 3 letter language code.
- * @country - string with ISO 3 letter country code .
- * @variant - string with language variant for that language and country pair.
- * return tts_result
- */
-tts_result TtsEngine::setLanguage(const char *lang, const char *country, const char *variant) {
- if (lang == NULL) {
- LOGE("TtsEngine::setLanguage called with NULL language");
- return TTS_FAILURE;
- }
-
- // we look for a match on the language first
- // then we look for a match on the country.
- // if no match on the language:
- // return an error
- // if match on the language, but no match on the country:
- // load the language found for the language match
- // if match on the language, and match on the country:
- // load the language found for the country match
-
- // find a match on the language
- int langIndex = -1;
- for (int i = 0; i < picoNumSupportedLang; i++)
- {
- if (strcmp(lang, picoSupportedLangIso3[i]) == 0) {
- langIndex = i;
- break;
- }
- }
- if (langIndex < 0) {
- // language isn't supported
- LOGE("TtsEngine::setLanguage called with unsupported language");
- return TTS_FAILURE;
- }
-
- // find a match on the country
- if (country != NULL) {
- int countryIndex = -1;
- for (int i = langIndex; i < picoNumSupportedLang; i++) {
- if ((strcmp(lang, picoSupportedLangIso3[i]) == 0)
- && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) {
- countryIndex = i;
- break;
- }
- }
-
- if (countryIndex < 0) {
- // we didn't find a match on the country, but we had a match on the language,
- // use that language
- LOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).",
- lang, country);
- } else {
- // we have a match on the language and the country
- langIndex = countryIndex;
- }
- }
-
- return doLanguageSwitchFromLangIndex( langIndex );
-}
-
/** getLanguage
* Get the currently loaded language - if any.
@@ -764,63 +1109,75 @@ tts_result TtsEngine::setAudioFormat(AudioSystem::audio_format& encoding, uint32
*/
tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size )
{
+ int rate;
+ int pitch;
+ int volume;
+
+ /* Set a specific property for the engine.
+ Supported properties include: language (locale), rate, pitch, volume. */
/* Sanity check */
if (property == NULL)
- {
+ {
LOGE("setProperty called with property NULL");
return TTS_PROPERTY_UNSUPPORTED;
- }
+ }
if (value == NULL)
- {
+ {
LOGE("setProperty called with value NULL");
return TTS_VALUE_INVALID;
- }
+ }
if (strncmp(property, "language", 8) == 0)
- {
- // verify it's in correct format
- if (strlen(value) != 2 && strlen(value) != 6)
{
+ /* Verify it's in correct format. */
+ if (strlen(value) != 2 && strlen(value) != 6)
+ {
LOGE("change language called with incorrect format");
return TTS_VALUE_INVALID;
- }
+ }
- // try to switch to specified language
+ /* Try to switch to specified language. */
if (doLanguageSwitch(value) == TTS_FAILURE)
- {
+ {
LOGE("failed to load language");
return TTS_FAILURE;
- }
+ }
else
- {
+ {
return TTS_SUCCESS;
+ }
}
- }
else if (strncmp(property, "rate", 4) == 0)
- {
- int rate = atoi(value);
- if (rate < PICO_MIN_RATE) rate = PICO_MIN_RATE;
- if (rate > PICO_MAX_RATE) rate = PICO_MAX_RATE;
+ {
+ rate = atoi(value);
+ if (rate < PICO_MIN_RATE)
+ rate = PICO_MIN_RATE;
+ if (rate > PICO_MAX_RATE)
+ rate = PICO_MAX_RATE;
picoProp_currRate = rate;
return TTS_SUCCESS;
- }
+ }
else if (strncmp(property, "pitch", 5) == 0)
- {
- int pitch = atoi(value);
- if (pitch < PICO_MIN_PITCH) pitch = PICO_MIN_PITCH;
- if (pitch > PICO_MAX_PITCH) pitch = PICO_MAX_PITCH;
+ {
+ pitch = atoi(value);
+ if (pitch < PICO_MIN_PITCH)
+ pitch = PICO_MIN_PITCH;
+ if (pitch > PICO_MAX_PITCH)
+ pitch = PICO_MAX_PITCH;
picoProp_currPitch = pitch;
return TTS_SUCCESS;
- }
+ }
else if (strncmp(property, "volume", 6) == 0)
- {
- int volume = atoi(value);
- if (volume < PICO_MIN_VOLUME) volume = PICO_MIN_VOLUME;
- if (volume > PICO_MAX_VOLUME) volume = PICO_MAX_VOLUME;
+ {
+ volume = atoi(value);
+ if (volume < PICO_MIN_VOLUME)
+ volume = PICO_MIN_VOLUME;
+ if (volume > PICO_MAX_VOLUME)
+ volume = PICO_MAX_VOLUME;
picoProp_currVolume = volume;
return TTS_SUCCESS;
- }
+ }
return TTS_PROPERTY_UNSUPPORTED;
}
@@ -829,44 +1186,46 @@ tts_result TtsEngine::setProperty( const char * property, const char * value, co
/** getProperty
* Get the property. Supported properties are: language, rate, pitch and volume.
* @property - name of property to get
- * @value - buffer which will receive value of property
- * @iosize - size of value - if size is too small on return this will contain actual size needed
+ * @value - buffer which will receive value of property
+ * @iosize - size of value - if size is too small on return this will contain actual size needed
* return tts_result
*/
-tts_result TtsEngine::getProperty(const char *property, char *value, size_t* iosize)
+tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize )
{
+ /* Get the property for the engine.
+ This property was previously set by setProperty or by default. */
/* sanity check */
if (property == NULL)
- {
+ {
LOGE("getProperty called with property NULL");
return TTS_PROPERTY_UNSUPPORTED;
- }
+ }
if (value == NULL)
- {
+ {
LOGE("getProperty called with value NULL");
return TTS_VALUE_INVALID;
- }
+ }
if (strncmp(property, "language", 8) == 0)
- {
- if (picoProp_currLang == NULL)
{
+ if (picoProp_currLang == NULL)
+ {
strcpy(value, "");
- }
+ }
else
- {
- if (*iosize < strlen(picoProp_currLang)+1)
{
+ if (*iosize < strlen(picoProp_currLang)+1)
+ {
*iosize = strlen(picoProp_currLang) + 1;
return TTS_PROPERTY_SIZE_TOO_SMALL;
- }
+ }
strcpy(value, picoProp_currLang);
- }
+ }
return TTS_SUCCESS;
- }
+ }
else if (strncmp(property, "rate", 4) == 0)
- {
+ {
char tmprate[4];
sprintf(tmprate, "%d", picoProp_currRate);
if (*iosize < strlen(tmprate)+1)
@@ -876,9 +1235,9 @@ tts_result TtsEngine::getProperty(const char *property, char *value, size_t* ios
}
strcpy(value, tmprate);
return TTS_SUCCESS;
- }
+ }
else if (strncmp(property, "pitch", 5) == 0)
- {
+ {
char tmppitch[4];
sprintf(tmppitch, "%d", picoProp_currPitch);
if (*iosize < strlen(tmppitch)+1)
@@ -888,67 +1247,119 @@ tts_result TtsEngine::getProperty(const char *property, char *value, size_t* ios
}
strcpy(value, tmppitch);
return TTS_SUCCESS;
- }
+ }
else if (strncmp(property, "volume", 6) == 0)
- {
+ {
char tmpvol[4];
sprintf(tmpvol, "%d", picoProp_currVolume);
if (*iosize < strlen(tmpvol)+1)
- {
+ {
*iosize = strlen(tmpvol) + 1;
return TTS_PROPERTY_SIZE_TOO_SMALL;
- }
+ }
strcpy(value, tmpvol);
return TTS_SUCCESS;
- }
- else
- {
- LOGE("Unsupported property");
- return TTS_PROPERTY_UNSUPPORTED;
- }
+ }
+
+ /* Unknown property */
+ LOGE("Unsupported property");
+ return TTS_PROPERTY_UNSUPPORTED;
}
/** synthesizeText
* Synthesizes a text string.
- * @text - text to synthesize
- * @buffer - buffer which will receive generated samples
+ * The text string could be annotated with SSML tags.
+ * @text - text to synthesize
+ * @buffer - buffer which will receive generated samples
* @bufferSize - size of buffer
* @userdata - pointer to user data which will be passed back to callback function
* return tts_result
*/
-tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata)
+tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
{
- pico_Char* inp = NULL;
- pico_Char* local_text = NULL;
- short outbuf[MAX_OUTBUF_SIZE/2];
- pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
+ int err;
+ int cbret;
+ pico_Char * inp = NULL;
+ pico_Char * local_text = NULL;
+ short outbuf[MAX_OUTBUF_SIZE/2];
+ pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
pico_Status ret;
- picoSynthAbort = 0;
+ SvoxSsmlParser * parser = NULL;
+ picoSynthAbort = 0;
if (text == NULL)
- {
+ {
LOGE("synthesizeText called with NULL string");
return TTS_FAILURE;
- }
+ }
if (buffer == NULL)
- {
+ {
LOGE("synthesizeText called with NULL buffer");
return TTS_FAILURE;
- }
+ }
- /* Add property tags to the string - if any. */
- local_text = (pico_Char*)doAddProperties(text);
- if (!local_text)
+ if ((strncmp(text, "<speak", 6) == 0) ||
+ (strncmp(text, "<?xml", 5) == 0) )
+ {
+ /* SSML input */
+ parser = new SvoxSsmlParser();
+ if (parser && parser->initSuccessful())
+ {
+ err = parser->parseDocument(text, 1);
+ if (err == XML_STATUS_ERROR)
+ {
+ LOGI("Warning: SSML document parsed with errors");
+ }
+ char * parsed_text = parser->getParsedDocument();
+ if (parsed_text)
+ {
+ /* Add property tags to the string - if any. */
+ local_text = (pico_Char *) doAddProperties( parsed_text );
+ if (!local_text)
+ {
+ LOGE("Failed to allocate memory for text string");
+ delete parser;
+ return TTS_FAILURE;
+ }
+ char * lang = parser->getParsedDocumentLanguage();
+ if (doLanguageSwitch(lang) == TTS_FAILURE)
+ {
+ LOGE("Failed to switch to language specified in SSML document.");
+ delete parser;
+ return TTS_FAILURE;
+ }
+ delete parser;
+ }
+ else
+ {
+ LOGE("Failed to parse SSML document");
+ delete parser;
+ return TTS_FAILURE;
+ }
+ }
+ else
+ {
+ LOGE("Failed to create SSML parser");
+ if (parser) delete parser;
+ return TTS_FAILURE;
+ }
+ }
+ else
{
+ /* Add property tags to the string - if any. */
+ local_text = (pico_Char *) doAddProperties( text );
+ if (!local_text)
+ {
LOGE("Failed to allocate memory for text string");
return TTS_FAILURE;
+ }
}
- text_remaining = strlen((const char*)local_text) + 1;
+ text_remaining = strlen((const char *) local_text) + 1;
- inp = (pico_Char*)local_text;
+ inp = (pico_Char *) local_text;
size_t bufused = 0;
@@ -956,19 +1367,20 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu
while (text_remaining)
{
if (picoSynthAbort)
- {
- ret = pico_resetEngine(picoEngine);
+ {
+ ret = pico_resetEngine( picoEngine );
break;
- }
+ }
/* Feed the text into the engine. */
- ret = pico_putTextUtf8(picoEngine, inp, text_remaining, &bytes_sent);
+ ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
if (ret != PICO_OK)
- {
+ {
LOGE("Error synthesizing string '%s': [%d]", text, ret);
- if (local_text) free(local_text);
+ if (local_text)
+ free( local_text );
return TTS_FAILURE;
- }
+ }
text_remaining -= bytes_sent;
inp += bytes_sent;
@@ -979,36 +1391,36 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu
break;
}
/* Retrieve the samples and add them to the buffer. */
- ret = pico_getData(picoEngine, (void*)outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type);
+ ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type );
if (bytes_recv)
- {
- if ((bufused + bytes_recv) <= bufferSize)
{
- memcpy(buffer+bufused, (int8_t*)outbuf, bytes_recv);
+ if ((bufused + bytes_recv) <= bufferSize)
+ {
+ memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
bufused += bytes_recv;
- }
+ }
else
- {
+ {
/* The buffer filled; pass this on to the callback function. */
- int cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING);
+ cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING);
if (cbret == TTS_CALLBACK_HALT)
- {
+ {
LOGI("Halt requested by caller. Halting.");
picoSynthAbort = 1;
break;
- }
+ }
bufused = 0;
- memcpy(buffer, (int8_t*)outbuf, bytes_recv);
+ memcpy(buffer, (int8_t *) outbuf, bytes_recv);
bufused += bytes_recv;
+ }
}
- }
} while (PICO_STEP_BUSY == ret);
- /* The synthesis is finished; notify the caller and pass the remaining samples.
+ /* This chunk of synthesis is finished; pass the remaining samples.
Use 16 KHz, 16-bit samples. */
if (!picoSynthAbort)
{
- picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE);
+ picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING);
}
picoSynthAbort = 0;
@@ -1016,17 +1428,28 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu
{
LOGE("Error occurred during synthesis [%d]", ret);
if (local_text) free(local_text);
+ LOGV("Synth loop: sending TTS_SYNTH_DONE after error");
+ picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE);
return TTS_FAILURE;
}
}
+
+ /* Synthesis is done; notify the caller */
+ LOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop");
+ picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE);
- if (local_text) free(local_text);
+ if (local_text) {
+ free( local_text );
+ }
return TTS_SUCCESS;
}
/** synthesizeIpa
* Synthesizes a phonetic string in IPA format.
+ The Pico engine understands only XSAMPA, so the IPA must be converted.
+ The string must also be parceled into words since it can only hanlde a word at a time.
+ Each phonemic word must be wrapped with a tag.
* @ipa - phonetic string to synthesize
* @buffer - buffer which will receive generated samples
* @bufferSize - size of buffer
@@ -1035,114 +1458,9 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu
*/
tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t bufferSize, void * userdata )
{
- pico_Char* inp = NULL;
- pico_Char* local_text = NULL;
- short outbuf[MAX_OUTBUF_SIZE/2];
- pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
- pico_Status ret;
-
- picoSynthAbort = 0;
- if (ipa == NULL)
- {
- LOGE("synthesizeIpa called with NULL string");
- return TTS_FAILURE;
- }
-
- if (buffer == NULL)
- {
- LOGE("synthesizeIpa called with NULL buffer");
- return TTS_FAILURE;
- }
-
- /* Append phoneme tag. %%%
- <phoneme ph="xxx"/> */
-
- /* Add property tags to the string - if any. */
- local_text = (pico_Char*)doAddProperties( ipa );
- if (!local_text)
- {
- LOGE("Failed to allocate memory for text string");
- return TTS_FAILURE;
- }
-
- text_remaining = strlen((const char*)local_text) + 1;
-
- inp = (pico_Char*)local_text;
-
- size_t bufused = 0;
-
- /* synthesis loop */
- while (text_remaining)
- {
- if (picoSynthAbort)
- {
- ret = pico_resetEngine( picoEngine );
- break;
- }
-
- /* Feed the text into the engine. */
- ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
- if (ret != PICO_OK)
- {
- LOGE("Error synthesizing string '%s': [%d]", ipa, ret);
- if (local_text) free(local_text);
- return TTS_FAILURE;
- }
-
- /* Process the remaining string. */
- text_remaining -= bytes_sent;
- inp += bytes_sent;
- do
- {
- if (picoSynthAbort)
- {
- break;
- }
- /* Retrieve the samples and add them to the buffer. */
- ret = pico_getData( picoEngine, (void*)outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type );
- if (bytes_recv)
- {
- if ((bufused + bytes_recv) <= bufferSize)
- {
- memcpy(buffer+bufused, (int8_t*)outbuf, bytes_recv);
- bufused += bytes_recv;
- }
- else
- {
- /* The buffer filled; pass this on to the callback function. */
- int cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING);
- if (cbret == TTS_CALLBACK_HALT)
- {
- LOGI("Halt requested by caller. Halting.");
- picoSynthAbort = 1;
- break;
- }
- bufused = 0;
- memcpy(buffer, (int8_t*)outbuf, bytes_recv);
- bufused += bytes_recv;
- }
- }
- } while (PICO_STEP_BUSY == ret);
-
- /* The synthesis is finished; notify the caller and pass the remaining samples.
- Use 16 KHz, 16-bit samples. */
- if (!picoSynthAbort)
- {
- picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE );
- }
- picoSynthAbort = 0; /* succeeded */
-
- if (ret != PICO_STEP_IDLE)
- {
- LOGE("Error occurred during synthesis [%d]", ret);
- if (local_text) free(local_text);
- return TTS_FAILURE;
- }
- }
+ // deprecated call
+ return TTS_FAILURE;
- if (local_text)
- free(local_text);
- return TTS_SUCCESS; /* succeeded */
}
@@ -1150,7 +1468,7 @@ tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t b
* Aborts the running synthesis.
* return tts_result
*/
-tts_result TtsEngine::stop()
+tts_result TtsEngine::stop( void )
{
picoSynthAbort = 1;
return TTS_SUCCESS;
@@ -1161,7 +1479,7 @@ tts_result TtsEngine::stop()
extern "C" {
#endif
-TtsEngine* getTtsEngine()
+TtsEngine * getTtsEngine( void )
{
return new TtsEngine();
}
diff --git a/pico/tts/svox_ssml_parser.cpp b/pico/tts/svox_ssml_parser.cpp
new file mode 100755
index 0000000..53dc786
--- /dev/null
+++ b/pico/tts/svox_ssml_parser.cpp
@@ -0,0 +1,893 @@
+/*
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * History:
+ * 2009-06-29 -- initial version
+ *
+ */
+
+#include "svox_ssml_parser.h"
+#include <utils/Log.h>
+#include <cutils/jstring.h>
+#include <string.h>
+
+
+#define SSML_PITCH_XLOW "50"
+#define SSML_PITCH_LOW "75"
+#define SSML_PITCH_MEDIUM "100"
+#define SSML_PITCH_HIGH "150"
+#define SSML_PITCH_XHIGH "200"
+#define SSML_RATE_XSLOW "30"
+#define SSML_RATE_SLOW "60"
+#define SSML_RATE_MEDIUM "100"
+#define SSML_RATE_FAST "250"
+#define SSML_RATE_XFAST "500"
+#define SSML_VOLUME_SILENT "0"
+#define SSML_VOLUME_XLOW "20"
+#define SSML_VOLUME_LOW "60"
+#define SSML_VOLUME_MEDIUM "100"
+#define SSML_VOLUME_LOUD "300"
+#define SSML_VOLUME_XLOUD "450"
+#define SSML_BREAK_NONE "0ms"
+#define SSML_BREAK_XWEAK "100ms"
+#define SSML_BREAK_WEAK "300ms"
+#define SSML_BREAK_MEDIUM "600ms"
+#define SSML_BREAK_STRONG "1s"
+#define SSML_BREAK_XSTRONG "3s"
+
+//TODO JMT remove comment
+//extern int cnvIpaToXsampa(const char16_t* ipaString, char** outXsampaString);
+
+SvoxSsmlParser::SvoxSsmlParser() : m_isInBreak(0), m_appendix(NULL), m_docLanguage(NULL)
+{
+ mParser = XML_ParserCreate("UTF-8");
+ if (mParser)
+ {
+ XML_SetElementHandler(mParser, starttagHandler, endtagHandler);
+ XML_SetCharacterDataHandler(mParser, textHandler);
+ XML_SetUserData(mParser, (void*)this);
+ m_datasize = 512;
+ m_data = new char[m_datasize];
+ m_data[0] = '\0';
+ }
+}
+
+SvoxSsmlParser::~SvoxSsmlParser()
+{
+ if (mParser)
+ XML_ParserFree(mParser);
+ if (m_data)
+ delete [] m_data;
+ if (m_appendix)
+ delete [] m_appendix;
+ if (m_docLanguage)
+ delete [] m_docLanguage;
+}
+
+int SvoxSsmlParser::initSuccessful()
+{
+ return (mParser && m_data);
+}
+
+int SvoxSsmlParser::parseDocument(const char* ssmldoc, int isFinal)
+{
+ int doclen = (int)strlen(ssmldoc) + 1;
+ int status = XML_Parse(mParser, ssmldoc, doclen, isFinal);
+ if (status == XML_STATUS_ERROR)
+ {
+ /* Note: for some reason Expat almost always complains about invalid tokens, even when document is well formed */
+ LOGI("Parser error at line %d: %s\n", (int)XML_GetCurrentLineNumber(mParser), XML_ErrorString(XML_GetErrorCode(mParser)));
+ }
+ return status;
+}
+
+char* SvoxSsmlParser::getParsedDocument()
+{
+ return m_data;
+}
+
+char* SvoxSsmlParser::getParsedDocumentLanguage()
+{
+ return m_docLanguage;
+}
+
+void SvoxSsmlParser::starttagHandler(void* data, const XML_Char* element, const XML_Char** attributes)
+{
+ ((SvoxSsmlParser*)data)->startElement(element, attributes);
+}
+
+void SvoxSsmlParser::startElement(const XML_Char* element, const XML_Char** attributes)
+{
+ if (strcmp(element, "speak") == 0)
+ {
+ if (strlen(m_data) > 0)
+ {
+ /* we have old data, get rid of it and reallocate memory */
+ delete m_data;
+ m_data = NULL;
+ m_datasize = 512;
+ m_data = new char[m_datasize];
+ if (!m_data)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+
+ /* the only attribute supported in the speak tag is xml:lang, all others are ignored */
+ for (int i = 0; attributes[i]; i += 2)
+ {
+ if (strcmp(attributes[i], "xml:lang") == 0)
+ {
+ if (!m_docLanguage)
+ {
+ m_docLanguage = new char[strlen(attributes[i+1])+1];
+ }
+ strcpy(m_docLanguage, attributes[i+1]);
+ break;
+ }
+ }
+ }
+ else if (strcmp(element, "p") == 0) /* currently no attributes are supported for <p> */
+ {
+ if (strlen(m_data) + 4 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "<p>");
+ }
+ else if (strcmp(element, "s") == 0) /* currently no attributes are supported for <s> */
+ {
+ if (strlen(m_data) + 4 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "<s>");
+ }
+ else if (strcmp(element, "phoneme") == 0) /* only ipa and xsampa alphabets are supported */
+ {
+ if (strlen(m_data) + 9 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "<phoneme");
+
+ int alpha = 1; /* set to 1 if alphabet is ipa */
+ char* ph = NULL;
+
+ for (int i = 0; attributes[i]; i += 2)
+ {
+ if (strcmp(attributes[i], "alphabet") == 0)
+ {
+ if (strcmp(attributes[i+1], "xsampa") == 0)
+ {
+ alpha = 0;
+ }
+ }
+ if (strcmp(attributes[i], "ph") == 0)
+ {
+ ph = new char[strlen(attributes[i+1]) + 1];
+ strcpy(ph, attributes[i+1]);
+ }
+ }
+ if (alpha)
+ {
+ /* need to convert phoneme string to xsampa */
+ size_t size = 0;
+ char16_t* ipastr = strdup8to16(ph, &size);
+ char16_t* xsampastr = NULL;
+ if (!ipastr)
+ {
+ LOGE("Error: failed to allocate memory for IPA string conversion");
+ return;
+ }
+ //TODO JMT remove comment
+ //size = cnvIpaToXsampa(ipastr, &xsampastr);
+ free(ipastr);
+ char* xsampa = strndup16to8(xsampastr, size);
+ if (!xsampa)
+ {
+ LOGE("Error: failed to allocate memory for IPA string conversion");
+ delete [] xsampastr;
+ return;
+ }
+ if (strlen(m_data) + strlen(xsampa) + 7 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!");
+ delete [] xsampastr;
+ free(xsampa);
+ return;
+ }
+ }
+ strcat(m_data, " ph='");
+ strcat(m_data, xsampa);
+ delete [] xsampastr;
+ free(xsampa);
+ }
+ else
+ {
+ strcat(m_data, " ph='");
+ strcat(m_data, ph);
+ }
+ if (ph)
+ delete [] ph;
+
+ if (strlen(m_data) + 3 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "'>");
+ }
+ else if (strcmp(element, "break") == 0)
+ {
+ if (strlen(m_data) + 17 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "<break time='");
+ char* time = NULL;
+
+ for (int i = 0; attributes[i]; i += 2)
+ {
+ if (strcmp(attributes[i], "time") == 0)
+ {
+ time = new char[strlen(attributes[i+1]) + 1];
+ if (!time)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ strcpy(time, attributes[i+1]);
+ }
+ else if (strcmp(attributes[i], "strength") == 0 && !time)
+ {
+ time = convertBreakStrengthToTime(attributes[i+1]);
+ }
+ }
+ if (!time)
+ {
+ time = new char[6];
+ if (!time)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ strcpy(time, SSML_BREAK_WEAK); /* if no time or strength attributes are specified, default to weak break */
+ }
+ if (strlen(m_data) + strlen(time) + 4 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, time);
+ strcat(m_data, "'/>");
+ m_isInBreak = 1;
+ }
+ else if (strcmp(element, "prosody") == 0) /* only pitch, rate and volume attributes are supported */
+ {
+ for (int i = 0; attributes[i]; i += 2)
+ {
+ if (strcmp(attributes[i], "pitch") == 0)
+ {
+ char* svoxpitch = convertToSvoxPitch(attributes[i+1]);
+ if (!svoxpitch)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ if (!svoxpitch)
+ {
+ svoxpitch = new char[4];
+ if (!svoxpitch)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ strcpy(svoxpitch, "100");
+ }
+ char* pitch = new char[17 + strlen(svoxpitch)];
+ if (!pitch)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ sprintf(pitch, "<pitch level='%s'>", svoxpitch);
+ if (strlen(m_data) + strlen(pitch) + 1 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, pitch);
+ if (!m_appendix)
+ {
+ m_appendix = new char[30];
+ m_appendix[0] = '\0';
+ }
+ strcat(m_appendix, "</pitch>");
+ delete [] svoxpitch;
+ delete [] pitch;
+ }
+ else if (strcmp(attributes[i], "rate") == 0)
+ {
+ char* svoxrate = convertToSvoxRate(attributes[i+1]);
+ if (!svoxrate)
+ {
+ svoxrate = new char[4];
+ if (!svoxrate)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ strcpy(svoxrate, "100");
+ }
+ char* rate = new char[17 + strlen(svoxrate)];
+ if (!rate)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ sprintf(rate, "<speed level='%s'>", svoxrate);
+ if (strlen(m_data) + strlen(rate) + 1 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, rate);
+ if (!m_appendix)
+ {
+ m_appendix = new char[30];
+ if (!m_appendix)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ m_appendix[0] = '\0';
+ }
+ strcat(m_appendix, "</speed>");
+ delete [] svoxrate;
+ delete [] rate;
+ }
+ else if (strcmp(attributes[i], "volume") == 0)
+ {
+ char* svoxvol = convertToSvoxVolume(attributes[i+1]);
+ if (!svoxvol)
+ {
+ svoxvol = new char[4];
+ if (!svoxvol)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ strcpy(svoxvol, "100");
+ }
+ char* volume = new char[18 + strlen(svoxvol)];
+ if (!volume)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ sprintf(volume, "<volume level='%s'>", svoxvol);
+ if (strlen(m_data) + strlen(volume) + 1 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, volume);
+ if (!m_appendix)
+ {
+ m_appendix = new char[30];
+ m_appendix[0] = '\0';
+ }
+ strcat(m_appendix, "</volume>");
+ delete [] svoxvol;
+ delete [] volume;
+ }
+ }
+ }
+ else if (strcmp(element, "audio") == 0) /* only 16kHz 16bit wav files are supported as src */
+ {
+ if (strlen(m_data) + 17 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "<usesig file='");
+
+ for (int i = 0; attributes[i]; i += 2)
+ {
+ if (strcmp(attributes[i], "src") == 0)
+ {
+ if (strlen(m_data) + strlen(attributes[i+1]) + 1 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, attributes[i+1]);
+ }
+ }
+ strcat(m_data, "'>");
+ }
+}
+
+void SvoxSsmlParser::endtagHandler(void* data, const XML_Char* element)
+{
+ ((SvoxSsmlParser*)data)->endElement(element);
+}
+
+void SvoxSsmlParser::endElement(const XML_Char* element)
+{
+ if (strcmp(element, "speak") == 0)
+ {
+
+ }
+ else if (strcmp(element, "p") == 0)
+ {
+ if (strlen(m_data) + 5 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "</p>");
+ }
+ else if (strcmp(element, "s") == 0)
+ {
+ if (strlen(m_data) + 5 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "</s>");
+ }
+ else if (strcmp(element, "phoneme") == 0)
+ {
+ if (strlen(m_data) + 11 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "</phoneme>");
+ }
+ else if (strcmp(element, "break") == 0)
+ {
+ m_isInBreak = 0; /* indicate we are no longer in break tag */
+ }
+ else if (strcmp(element, "prosody") == 0)
+ {
+ if (m_appendix)
+ {
+ if (strlen(m_data) + strlen(m_appendix) + 1 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, m_appendix);
+ delete [] m_appendix;
+ m_appendix = NULL;
+ }
+ }
+ else if (strcmp(element, "audio") == 0)
+ {
+ if (strlen(m_data) + 10 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, "</usesig>");
+ }
+}
+
+void SvoxSsmlParser::textHandler(void* data, const XML_Char* text, int length)
+{
+ ((SvoxSsmlParser*)data)->textElement(text, length);
+}
+
+void SvoxSsmlParser::textElement(const XML_Char* text, int length)
+{
+ if (m_isInBreak)
+ {
+ return; /* handles the case when someone has added text inside the break tag - this text is thrown away */
+ }
+
+ char* content = new char[length + 1];
+ if (!content)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ strncpy(content, text, length);
+ content[length] = '\0';
+
+ if (strlen(m_data) + strlen(content) + 1 > (size_t)m_datasize)
+ {
+ if (!growDataSize(100))
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return;
+ }
+ }
+ strcat(m_data, content);
+ delete [] content;
+}
+
+/**
+ convertToSvoxPitch
+ Converts SSML pitch labels to SVOX pitch levels
+*/
+char* SvoxSsmlParser::convertToSvoxPitch(const char* value)
+{
+ char* converted = NULL;
+ if (strcmp(value, "x-low") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_PITCH_XLOW);
+ }
+ else if (strcmp(value, "low") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_PITCH_LOW);
+ }
+ else if (strcmp(value, "medium") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_PITCH_MEDIUM);
+ }
+ else if (strcmp(value, "default") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_PITCH_MEDIUM);
+ }
+ else if (strcmp(value, "high") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_PITCH_HIGH);
+ }
+ else if (strcmp(value, "x-high") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_PITCH_XHIGH);
+ }
+ return converted;
+}
+
+/**
+ convertToSvoxRate
+ Converts SSML rate labels to SVOX speed levels
+*/
+char* SvoxSsmlParser::convertToSvoxRate(const char* value)
+{
+ char* converted = NULL;
+ if (strcmp(value, "x-slow") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_RATE_XSLOW);
+ }
+ else if (strcmp(value, "slow") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_RATE_SLOW);
+ }
+ else if (strcmp(value, "medium") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_RATE_MEDIUM);
+ }
+ else if (strcmp(value, "default") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_RATE_MEDIUM);
+ }
+ else if (strcmp(value, "fast") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_RATE_FAST);
+ }
+ else if (strcmp(value, "x-fast") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_RATE_XFAST);
+ }
+ return converted;
+}
+
+/**
+ convertToSvoxVolume
+ Converts SSML volume labels to SVOX volume levels
+*/
+char* SvoxSsmlParser::convertToSvoxVolume(const char* value)
+{
+ char* converted = NULL;
+ if (strcmp(value, "silent") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_SILENT);
+ }
+ else if (strcmp(value, "x-low") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_XLOW);
+ }
+ else if (strcmp(value, "low") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_LOW);
+ }
+ else if (strcmp(value, "medium") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_MEDIUM);
+ }
+ else if (strcmp(value, "default") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_MEDIUM);
+ }
+ else if (strcmp(value, "loud") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_LOUD);
+ }
+ else if (strcmp(value, "x-loud") == 0)
+ {
+ converted = new char[4];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_VOLUME_XLOUD);
+ }
+ return converted;
+}
+
+/**
+ convertBreakStrengthToTime
+ Converts SSML break strength labels to SVOX break time
+*/
+char* SvoxSsmlParser::convertBreakStrengthToTime(const char* value)
+{
+ char* converted = NULL;
+ if (strcmp(value, "none") == 0)
+ {
+ converted = new char[6];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_BREAK_NONE);
+ }
+ else if (strcmp(value, "x-weak") == 0)
+ {
+ converted = new char[6];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_BREAK_XWEAK);
+ }
+ else if (strcmp(value, "weak") == 0)
+ {
+ converted = new char[6];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_BREAK_WEAK);
+ }
+ else if (strcmp(value, "medium") == 0)
+ {
+ converted = new char[6];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_BREAK_MEDIUM);
+ }
+ else if (strcmp(value, "strong") == 0)
+ {
+ converted = new char[6];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_BREAK_STRONG);
+ }
+ else if (strcmp(value, "x-strong") == 0)
+ {
+ converted = new char[6];
+ if (!converted)
+ {
+ LOGE("Error: failed to allocate memory for string!\n");
+ return NULL;
+ }
+ strcpy(converted, SSML_BREAK_XSTRONG);
+ }
+ return converted;
+}
+
+/**
+ growDataSize
+ Increases the size of the internal text storage member
+*/
+int SvoxSsmlParser::growDataSize(int sizeToGrow)
+{
+ char* tmp = new char[m_datasize];
+ if (!tmp)
+ return 0;
+
+ strcpy(tmp, m_data);
+ delete [] m_data;
+ m_data = NULL;
+ m_data = new char[m_datasize + sizeToGrow];
+ if (!m_data)
+ {
+ m_data = tmp;
+ return 0;
+ }
+ m_datasize += sizeToGrow;
+ strcpy(m_data, tmp);
+ delete [] tmp;
+ tmp = NULL;
+ return 1;
+}
diff --git a/pico/tts/svox_ssml_parser.h b/pico/tts/svox_ssml_parser.h
new file mode 100755
index 0000000..cc83305
--- /dev/null
+++ b/pico/tts/svox_ssml_parser.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * History:
+ * 2009-06-29 -- initial version
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <libexpat/expat.h>
+
+#ifndef _SVOX_SSML_PARSER_H_
+#define _SVOX_SSML_PARSER_H_
+
+/**
+ * SvoxSsmlParser
+ * Parses SSML 1.0 XML documents and convertes it to Pico compatible text input
+ */
+class SvoxSsmlParser
+{
+ public: /* construction code */
+
+ /**
+ Constructor
+ Creates Expat parser and allocates initial text storage
+ */
+ SvoxSsmlParser();
+
+ /**
+ Destructor
+ Deallocates all resources
+ */
+ ~SvoxSsmlParser();
+
+ /**
+ initSuccessful
+ Verifies that construction was successful
+ return 1 if successful, 0 otherwise
+ */
+ int initSuccessful();
+
+ public: /* public members */
+
+ /**
+ parseDocument
+ Parses SSML 1.0 document passed in as argument
+ @ssmldoc - SSML document, partial document input is supported
+ @isFinal - indicates whether input is a complete or partial document, 1 indicates complete document, 0 indicates partial
+ return Expat status code
+ */
+ int parseDocument(const char* ssmldoc, int isFinal);
+
+ /**
+ getParsedDocument
+ Returns string containing parse result. This can be passed on to Pico for synthesis
+ return parsed string, NULL if error occurred
+ */
+ char* getParsedDocument();
+
+ /**
+ getParsedDocumentLanguage
+ Returns language string specified in xml:lang attribute of the <speak> tag
+ return language code of SSML document, NULL if not set
+ */
+ char* getParsedDocumentLanguage();
+
+ private: /* static callback functions */
+
+ /**
+ starttagHandler
+ Static callback function for Expat start-tag events, internal use only
+ */
+ static void starttagHandler(void* data, const XML_Char* element, const XML_Char** attributes);
+
+ /**
+ endtagHandler
+ Static callback function for Expat end-tag events, internal use only
+ */
+ static void endtagHandler(void* data, const XML_Char* element);
+
+ /**
+ textHandler
+ Static callback function for Expat text events, internal use only
+ */
+ static void textHandler(void* data, const XML_Char* text, int length);
+
+ private: /* element handlers */
+
+ /**
+ startElement
+ Handles start of element, called by starttagHandler.
+ */
+ void startElement(const XML_Char* element, const XML_Char** attributes);
+
+ /**
+ endElement
+ Handles end of element, called by endtagHandler.
+ */
+ void endElement(const XML_Char* element);
+
+ /**
+ textElement
+ Handles text element, called by textHandler.
+ */
+ void textElement(const XML_Char* text, int length);
+
+ /* helper functions */
+
+ /**
+ convertToSvoxPitch
+ Convertes SSML prosody tag pitch values to SVOX Pico pitch values.
+ */
+ char* convertToSvoxPitch(const char* value);
+
+ /**
+ convertToSvoxRate
+ Convertes SSML prosody tag rate values to SVOX Pico speed values.
+ */
+ char* convertToSvoxRate(const char* value);
+
+ /**
+ convertToSvoxVolume
+ Convertes SSML prosody tag volume values to SVOX Pico volume values.
+ */
+ char* convertToSvoxVolume(const char* value);
+
+ /**
+ convertBreakStrengthToTime
+ Convertes SSML break tag strength attribute values to SVOX Pico break time values.
+ */
+ char* convertBreakStrengthToTime(const char* value);
+
+ /**
+ growDataSize
+ Increases size of internal text field.
+ */
+ int growDataSize(int sizeToGrow);
+
+ private: /* data members*/
+
+ char* m_data; /* internal text field, holds parsed text */
+ int m_datasize; /* size of internal text field */
+ XML_Parser mParser; /* Expat XML parser pointer */
+ int m_isInBreak; /* indicator for handling break tag parsing */
+ char* m_appendix; /* holds Pico pitch, speed and volume close tags for prosody tag parsing */
+ char* m_docLanguage; /* language set in speak tag of SSML document */
+};
+
+#endif // _SVOX_SSML_PARSER_H_