diff options
author | Jean-Michel Trivi <jmtrivi@google.com> | 2009-07-01 18:13:11 -0700 |
---|---|---|
committer | Jean-Michel Trivi <jmtrivi@google.com> | 2009-07-01 18:43:33 -0700 |
commit | 39358f0dacad8cece6c2d3ef1055030f57090c79 (patch) | |
tree | 94e7279dc71010d536cc7bd9743ab4b603927d20 /pico/tts | |
parent | 5f6105e73f0c74acd10126175d4952ade10a0b05 (diff) | |
download | external_svox-39358f0dacad8cece6c2d3ef1055030f57090c79.zip external_svox-39358f0dacad8cece6c2d3ef1055030f57090c79.tar.gz external_svox-39358f0dacad8cece6c2d3ef1055030f57090c79.tar.bz2 |
- Integrate SVOX's latest code drop.
- Fix a bug where TTS_SYNTH_DONE was signaled to the TTS service synth proxy
even though the synthesis wasn't done (more data to be played).
- Deprecated the synthesizeIPA() function as it will be supported by the
phoneme tag in synthesize().
Diffstat (limited to 'pico/tts')
-rw-r--r-- | pico/tts/com_svox_picottsengine.cpp | 1172 | ||||
-rwxr-xr-x | pico/tts/svox_ssml_parser.cpp | 893 | ||||
-rwxr-xr-x | pico/tts/svox_ssml_parser.h | 162 |
3 files changed, 1800 insertions, 427 deletions
diff --git a/pico/tts/com_svox_picottsengine.cpp b/pico/tts/com_svox_picottsengine.cpp index 8a6d1d5..49c1f22 100644 --- a/pico/tts/com_svox_picottsengine.cpp +++ b/pico/tts/com_svox_picottsengine.cpp @@ -1,4 +1,5 @@ -/* +/* com_svox_picottsengine.cpp + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,11 +13,35 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * - * History: - * 2009-05-18 -- initial version - * 2009-06-04 -- updated for new TtsEngine interface - * + + This is the Manager layer. It sits on top of the native Pico engine + and provides the interface to the defined Google TTS engine API. + The Google engine API is the boundary to allow a TTS engine to be swapped. + The Manager layer also provide the SSML tag interpretation. + The supported SSML tags are mapped to corresponding tags natively supported by Pico. + Native Pico functions always begin with picoXXX. + + In the Pico engine, the language cannot be changed indpendently of the voice. + If either the voice or locale/language are changed, a new resource is loaded. + + Only a subset of SSML 1.0 tags are supported. + Some SSML tags involve significant complexity. + If the language is changed through an SSML tag, there is a latency for the load. + + History: + 2009-07-01 -- clean up & documentation + 2009-06-29 -- integrated SSML parser and IPA to XSAMPA conversion + 2009-06-29 -- revised for C90 compliance + 2009-06-04 -- updated for new TtsEngine interface + 2009-05-18 -- initial version + + + ToDo: + - duplicate 'phoneme ph=' *** + - normalize IPA strings ** + - change voice as well as language * + Changing of voice/language is complex due to potentially multiple swaps + under SSML and the latent effects. This requires storing a history of tags/sets. */ #include <stdio.h> @@ -25,70 +50,75 @@ #define LOG_TAG "SVOX Pico Engine" #include <utils/Log.h> +#include <utils/String16.h> /* for strlen16 */ #include <android_runtime/AndroidRuntime.h> #include <tts/TtsEngine.h> +#include <cutils/jstring.h> #include <picoapi.h> #include <picodefs.h> +#include "svox_ssml_parser.h" using namespace android; /* adaptation layer defines */ #define PICO_MEM_SIZE 2500000 -#define PICO_MIN_RATE 20 /* speaking rate */ -#define PICO_DEF_RATE 100 +#define PICO_MIN_RATE 20 #define PICO_MAX_RATE 500 -#define PICO_MIN_PITCH 50 +#define PICO_DEF_RATE 100 /* speaking pitch */ -#define PICO_DEF_PITCH 100 +#define PICO_MIN_PITCH 50 #define PICO_MAX_PITCH 200 -#define PICO_MIN_VOLUME 0 +#define PICO_DEF_PITCH 100 /* speaking volume */ -#define PICO_DEF_VOLUME 400 +#define PICO_MIN_VOLUME 0 #define PICO_MAX_VOLUME 500 +#define PICO_DEF_VOLUME 250 + /* string constants */ #define MAX_OUTBUF_SIZE 128 -const char* PICO_LINGWARE_PATH = "/sdcard/svox/"; -const char* PICO_VOICE_NAME = "PicoVoice"; -const char* PICO_SPEED_OPEN_TAG = "<speed level='%d'>"; -const char* PICO_SPEED_CLOSE_TAG = "</speed>"; -const char* PICO_PITCH_OPEN_TAG = "<pitch level='%d'>"; -const char* PICO_PITCH_CLOSE_TAG = "</pitch>"; -const char* PICO_VOLUME_OPEN_TAG = "<volume level='%d'>"; -const char* PICO_VOLUME_CLOSE_TAG = "</volume>"; -const char* PICO_PHONEME_OPEN_TAG = "<phoneme ph=\"%s\">"; - -/* supported voices */ -const char* picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; -const char* picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; -const char* picoSupportedLang[] = { "en-rUS", "en-rGB", "de-rDE", "es-rES", "fr-rFR", "it-rIT" }; -const char* picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; -const char* picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; -const char* picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; -const char* picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; -const int picoNumSupportedLang = 6; +const char * PICO_LINGWARE_PATH = "/sdcard/svox/"; +const char * PICO_VOICE_NAME = "PicoVoice"; +const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>"; +const char * PICO_SPEED_CLOSE_TAG = "</speed>"; +const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>"; +const char * PICO_PITCH_CLOSE_TAG = "</pitch>"; +const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>"; +const char * PICO_VOLUME_CLOSE_TAG = "</volume>"; +const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='"; + +/* supported voices + Pico does not seperately specify the voice and locale. */ +const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; +const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; +const char * picoSupportedLang[] = { "en-rUS", "en-rGB", "de-rDE", "es-rES", "fr-rFR", "it-rIT" }; +const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; +const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; +const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; +const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; +const int picoNumSupportedVocs = 6; /* supported properties */ -const char* picoSupportedProperties[] = { "language", "rate", "pitch", "volume" }; -const int picoNumSupportedProperties = 4; +const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" }; +const int picoNumSupportedProperties = 4; /* adapation layer global variables */ -synthDoneCB_t* picoSynthDoneCBPtr; -void* picoMemArea = NULL; -pico_System picoSystem = NULL; -pico_Resource picoTaResource = NULL; -pico_Resource picoSgResource = NULL; -pico_Resource picoUtppResource = NULL; -pico_Engine picoEngine = NULL; -pico_Char* picoTaFileName = NULL; -pico_Char* picoSgFileName = NULL; -pico_Char* picoUtppFileName = NULL; -pico_Char* picoTaResourceName = NULL; -pico_Char* picoSgResourceName = NULL; -pico_Char* picoUtppResourceName = NULL; +synthDoneCB_t * picoSynthDoneCBPtr; +void * picoMemArea = NULL; +pico_System picoSystem = NULL; +pico_Resource picoTaResource = NULL; +pico_Resource picoSgResource = NULL; +pico_Resource picoUtppResource = NULL; +pico_Engine picoEngine = NULL; +pico_Char * picoTaFileName = NULL; +pico_Char * picoSgFileName = NULL; +pico_Char * picoUtppFileName = NULL; +pico_Char * picoTaResourceName = NULL; +pico_Char * picoSgResourceName = NULL; +pico_Char * picoUtppResourceName = NULL; int picoSynthAbort = 0; -char* picoProp_currLang = NULL; /* current language */ +char * picoProp_currLang = NULL; /* current language */ int picoProp_currRate = PICO_DEF_RATE; /* current rate */ int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */ int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */ @@ -98,31 +128,38 @@ int picoCurrentLangIndex = -1; /* internal helper functions */ -/** checkForLanguage - * Check if the requested language is among the supported languages. - * @language - the language to check, either in xx or xx-rYY format - * return index of the language, or -1 if not supported. +/** checkForLocale + * Check whether the requested locale is among the supported locales. + * @locale - the locale to check, either in xx or xx-rYY format + * return index of the locale, or -1 if not supported. */ -static int checkForLanguage( const char * language ) +static int checkForLocale( const char * locale ) { int found = -1; /* language not found */ + int i; + if (locale == NULL) { + LOGE("checkForLanguage called with NULL language"); + return found; + } /* Verify that the requested locale is a locale that we support. */ - for (int i = 0; i < picoNumSupportedLang; i++) + for (i = 0; i < picoNumSupportedVocs; i ++) { - if (strcmp(language, picoSupportedLang[i]) == 0) + if (strcmp(locale, picoSupportedLang[i]) == 0) /* in array */ { found = i; break; } }; + + /* The locale was not found. */ if (found < 0) { /* We didn't find an exact match; it may have been specified with only the first 2 characters. - This could overmatch ISO 639-3 language codes. */ - for (int i = 0; i < picoNumSupportedLang; i++) + This could overmatch ISO 639-3 language codes.%% */ + for (i = 0; i < picoNumSupportedVocs; i ++) { - if (strncmp(language, picoSupportedLang[i], 2) == 0) + if (strncmp(locale, picoSupportedLang[i], 2) == 0) { found = i; break; @@ -130,7 +167,7 @@ static int checkForLanguage( const char * language ) } if (found < 0) { - LOGE("TtsEngine::set language called with unsupported language"); + LOGE("TtsEngine::set language called with unsupported locale"); } }; return found; @@ -145,7 +182,7 @@ static void cleanResources( void ) if (picoEngine) { pico_disposeEngine( picoSystem, &picoEngine ); - pico_releaseVoiceDefinition(picoSystem, (pico_Char*)PICO_VOICE_NAME); + pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME ); picoEngine = NULL; } if (picoUtppResource) @@ -174,43 +211,43 @@ static void cleanFiles( void ) { if (picoProp_currLang) { - free(picoProp_currLang); + free( picoProp_currLang ); picoProp_currLang = NULL; } if (picoTaFileName) { - free(picoTaFileName); + free( picoTaFileName ); picoTaFileName = NULL; } if (picoSgFileName) { - free(picoSgFileName); + free( picoSgFileName ); picoSgFileName = NULL; } if (picoUtppFileName) { - free(picoUtppFileName); + free( picoUtppFileName ); picoUtppFileName = NULL; } if (picoTaResourceName) { - free(picoTaResourceName); + free( picoTaResourceName ); picoTaResourceName = NULL; } if (picoSgResourceName) { - free(picoSgResourceName); + free( picoSgResourceName ); picoSgResourceName = NULL; } if (picoUtppResourceName) { - free(picoUtppResourceName); + free( picoUtppResourceName ); picoUtppResourceName = NULL; } } @@ -249,44 +286,47 @@ static bool hasResourcesForLanguage(int langIndex) { } /** doLanguageSwitchFromLangIndex - * Switch to requested language. If language is already loaded it returns - * immediately, if another language is loaded this will first be unloaded - * and the new one then loaded. If no language is loaded the requested will be loaded. - * @langIndex - the index of the language to load, which is guaranteed to be supported. + * Switch to the requested locale. + * If the locale is already loaded, it returns immediately. + * If another locale is already is loaded, it will first be unloaded and the new one then loaded. + * If no locale is loaded, the requested locale will be loaded. + * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported. * return TTS_SUCCESS or TTS_FAILURE */ -static tts_result doLanguageSwitchFromLangIndex(int langIndex) +static tts_result doLanguageSwitchFromLangIndex( int langIndex ) { - // if we already have a loaded language, check if it's the same one as requested + int ret; /* function result code */ + + /* If we already have a loaded locale, check whether it is the same one as requested. */ if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) { LOGI("Language already loaded (%s == %s)", picoProp_currLang, picoSupportedLang[langIndex]); return TTS_SUCCESS; } - // not the same language, unload the current one first + /* It is not the same locale; unload the current one first. */ cleanResources(); - // allocate memory for file and resource names + /* Allocate memory for file and resource names. */ cleanFiles(); - picoProp_currLang = (char*)malloc(10); - picoTaFileName = (pico_Char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); - picoSgFileName = (pico_Char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); - picoUtppFileName = (pico_Char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE); - picoTaResourceName = (pico_Char*)malloc(PICO_MAX_RESOURCE_NAME_SIZE); - picoSgResourceName = (pico_Char*)malloc(PICO_MAX_RESOURCE_NAME_SIZE); - picoUtppResourceName = (pico_Char*)malloc(PICO_MAX_RESOURCE_NAME_SIZE); - - // set path and file names for resource files - strcpy((char*)picoTaFileName, PICO_LINGWARE_PATH); - strcat((char*)picoTaFileName, (const char*)picoInternalTaLingware[langIndex]); - strcpy((char*)picoSgFileName, PICO_LINGWARE_PATH); - strcat((char*)picoSgFileName, (const char*)picoInternalSgLingware[langIndex]); - strcpy((char*)picoUtppFileName, PICO_LINGWARE_PATH); - strcat((char*)picoUtppFileName, (const char*)picoInternalUtppLingware[langIndex]); - - // load text analysis Lingware resource file - int ret = pico_loadResource(picoSystem, picoTaFileName, &picoTaResource); + picoProp_currLang = (char *) malloc( 10 ); + picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); + picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); + picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); + picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); + picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); + picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); + + /* Set the path and file names for resource files. */ + strcpy((char *) picoTaFileName, PICO_LINGWARE_PATH); + strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]); + strcpy((char *) picoSgFileName, PICO_LINGWARE_PATH); + strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]); + strcpy((char *) picoUtppFileName, PICO_LINGWARE_PATH); + strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]); + + /* Load the text analysis Lingware resource file. */ + ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ); if (PICO_OK != ret) { LOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret); @@ -295,8 +335,8 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - // load signal generation Lingware resource file - ret = pico_loadResource(picoSystem, picoSgFileName, &picoSgResource); + /* Load the signal generation Lingware resource file. */ + ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ); if (PICO_OK != ret) { LOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret); @@ -305,20 +345,20 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - // Load utpp Lingware resource file if exists - NOTE: this file is optional - // and is currently not used. Loading is only attempted for future compatibility. - // If this file is not present the loading will still succeed. - ret = pico_loadResource(picoSystem, picoUtppFileName, &picoUtppResource); - if (PICO_OK != ret && ret != PICO_EXC_CANT_OPEN_FILE) - { + /* Load the utpp Lingware resource file if exists - NOTE: this file is optional + and is currently not used. Loading is only attempted for future compatibility. + If this file is not present the loading will still succeed. */ + ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource ); + if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) + { LOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret); cleanResources(); cleanFiles(); return TTS_FAILURE; - } + } - // Get text analysis resource name - ret = pico_getResourceName(picoSystem, picoTaResource, (char*)picoTaResourceName); + /* Get the text analysis resource name. */ + ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ); if (PICO_OK != ret) { LOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret); @@ -327,12 +367,12 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - // Get signal generation resource name - ret = pico_getResourceName(picoSystem, picoSgResource, (char*)picoSgResourceName); + /* Get the signal generation resource name. */ + ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ); if (PICO_OK == ret && picoUtppResource != NULL) { - // Get utpp resource name - optional: see note above - ret = pico_getResourceName(picoSystem, picoUtppResource, (char*)picoUtppResourceName); + /* Get utpp resource name - optional: see note above. */ + ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName ); if (PICO_OK != ret) { LOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret); @@ -349,8 +389,8 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - // create a voice definition - ret = pico_createVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME); + /* Create a voice definition. */ + ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ); if (PICO_OK != ret) { LOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret); @@ -359,8 +399,8 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - // add text analysis resource to voice - ret = pico_addResourceToVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME, picoTaResourceName); + /* Add the text analysis resource to the voice. */ + ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ); if (PICO_OK != ret) { LOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); @@ -369,12 +409,12 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - // add signal generation resource to voice - ret = pico_addResourceToVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME, picoSgResourceName); + /* Add the signal generation resource to the voice. */ + ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ); if (PICO_OK == ret && picoUtppResource != NULL) { - // add utpp resource to voice - optional: see note above - ret = pico_addResourceToVoiceDefinition(picoSystem, (const pico_Char*)PICO_VOICE_NAME, picoUtppResourceName); + /* Add utpp resource to voice - optional: see note above. */ + ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName ); if (PICO_OK != ret) { LOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret); @@ -392,7 +432,7 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - ret = pico_newEngine(picoSystem, (const pico_Char*)PICO_VOICE_NAME, &picoEngine); + ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ); if (PICO_OK != ret) { LOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret); @@ -401,47 +441,53 @@ static tts_result doLanguageSwitchFromLangIndex(int langIndex) return TTS_FAILURE; } - strcpy(picoProp_currLang, picoSupportedLang[langIndex]); - picoCurrentLangIndex = langIndex; - + /* Set the current locale/voice. */ + strcpy( picoProp_currLang, picoSupportedLang[langIndex] ); LOGI("loaded %s successfully", picoProp_currLang); - return TTS_SUCCESS; } + /** doLanguageSwitch - * Switch to requested language. If language is already loaded it returns - * immediately, if another language is loaded this will first be unloaded - * and the new one then loaded. If no language is loaded the requested will be loaded. - * @language - the language to check, either in xx or xx-rYY format (i.e "en" or "en-rUS") + * Switch to the requested locale. + * If this locale is already loaded, it returns immediately. + * If another locale is already loaded, this will first be unloaded + * and the new one then loaded. + * If no locale is loaded, the requested will be loaded. + * @locale - the locale to check, either in xx or xx-rYY format (i.e "en" or "en-rUS") * return TTS_SUCCESS or TTS_FAILURE */ -static tts_result doLanguageSwitch(const char* language) +static tts_result doLanguageSwitch( const char * locale ) { - // load new language - int langIndex = checkForLanguage(language); - if (langIndex < 0) - { - LOGE("Tried to swith to non-supported language %s", language); - return TTS_FAILURE; - } - LOGI("Found supported language %s", picoSupportedLang[langIndex]); + int loclIndex; /* locale index */ - return doLanguageSwitchFromLangIndex( langIndex ); + /* Load the new locale. */ + loclIndex = checkForLocale( locale ); + if (loclIndex < 0) + { + LOGE("Tried to swith to non-supported locale %s", locale); + return TTS_FAILURE; + } + LOGI("Found supported locale %s", picoSupportedLang[loclIndex]); + return doLanguageSwitchFromLangIndex( loclIndex ); } + /** doAddProperties - * Add <speed>, <pitch> and <volume> tags to text, if the properties have been set to non-default values, - * and return the new string. The calling function is responsible for freeing the returned string. + * Add <speed>, <pitch> and <volume> tags to the text, + * if the properties have been set to non-default values, and return the new string. + * The calling function is responsible for freeing the returned string. * @str - text to apply tags to * return new string with tags applied */ -static char* doAddProperties(const char* str) +static char * doAddProperties( const char * str ) { - char* data = NULL; - int haspitch = 0, hasspeed = 0, hasvol = 0; - int textlen = strlen(str) + 1; + char * data = NULL; + int haspitch, hasspeed, hasvol; /* parameters */ + int textlen; /* property string length */ + haspitch = 0; hasspeed = 0; hasvol = 0; + textlen = strlen(str) + 1; if (picoProp_currPitch != PICO_DEF_PITCH) /* non-default pitch */ { textlen += strlen(PICO_PITCH_OPEN_TAG) + 5; @@ -461,7 +507,8 @@ static char* doAddProperties(const char* str) hasvol = 1; } - data = (char*)malloc(textlen); + /* Compose the property strings. */ + data = (char *) malloc( textlen ); /* allocate string */ if (!data) { return NULL; @@ -512,10 +559,292 @@ static char* doAddProperties(const char* str) } -/* API function implementations */ +/** createPhonemeString + * Wrap all individual words in <phoneme> tags. + * The Pico <phoneme> tag only supports one word in each tag, + * therefore they must be individually wrapped! + * @xsampa - text to convert to Pico phomene string + * @length - length of the input string + * return new string with tags applied +*/ +static char * createPhonemeString( const char * xsampa, int length ) +{ + char * convstring = NULL; + int origStrLen = strlen(xsampa); + int numWords = 1; + int start, totalLength, i, j; + + for (i = 0; i < origStrLen; i ++) + { + if (xsampa[i] == ' ') + numWords ++; + } + + if (numWords == 1) + { + convstring = new char[origStrLen + 17]; + convstring[0] = '\0'; + strcat(convstring, "<phoneme ph='"); + strcat(convstring, xsampa); + strcat(convstring, "'/>"); + } + else + { + char * words[numWords]; + start = 0; totalLength = 0; i = 0; j = 0; + for (i=0, j=0; i < origStrLen; i ++) + { + if (xsampa[i] == ' ') + { + words[j] = new char[i+1-start+17]; + words[j][0] = '\0'; + strcat( words[j], "<phoneme ph='"); + strncat(words[j], xsampa+start, i-start); + strcat( words[j], "'/>"); + start = i + 1; + j ++; + totalLength += strlen(words[j-1]); + } + } + words[j] = new char[i+1-start+17]; + words[j][0] = '\0'; + strcat(words[j], "<phoneme ph='"); + strcat(words[j], xsampa+start); + strcat(words[j], "'/>"); + totalLength += strlen(words[j]); + convstring = new char[totalLength + 1]; + convstring[0] = '\0'; + for (i=0; i < numWords; i ++) + { + strcat(convstring, words[i]); + delete [] words[i]; + } + } + return convstring; +} + + +/* The XSAMPA uses as many as 4 characters to represent a single IPA code. */ +typedef struct tagPhnArr + { + char16_t strIPA; /* IPA Unicode symbol */ + char strXSAMPA[5]; /* SAMPA sequence */ + } PArr; + +#define phn_cnt (133) + +PArr PhnAry[phn_cnt] = { + + /* XSAMPA conversion table */ + + /* Vowels (23) complete */ + {0x025B, "E"}, + {0x0251, "A"}, + {0x0254, "O"}, + {0x00F8, "2"}, + {0x0153, "9"}, + {0x0276, "&"}, + {0x0252, "Q"}, + {0x028C, "V"}, + {0x0264, "7"}, + {0x026F, "M"}, + {0x0268, "1"}, + {0x0289, "}"}, + {0x026A, "I"}, + {0x028F, "Y"}, + {0x028A, "U"}, + {0x0259, "@"}, + {0x0275, "8"}, + {0x0250, "6"}, + {0x00E6, "{"}, + {0x025C, "3"}, + {0x025A, "@'"}, + {0x025E, "3\\"}, + {0x0258, "@\\"}, + + /* Consonants (60) complete */ + {0x0288, "t'"}, + {0x0256, "d'"}, + {0x025F, "J\\"}, + {0x0261, "g"}, + {0x0262, "G\\"}, + {0x0294, "?"}, + {0x0271, "F"}, + {0x0273, "n'"}, + {0x0272, "J"}, + {0x014B, "N"}, + {0x0274, "N\\"}, + {0x0299, "B\\"}, + {0x0280, "R\\"}, + {0x027E, "4"}, + {0x027D, "r'"}, + {0x0278, "p\\"}, + {0x03B2, "B"}, + {0x03B8, "T"}, + {0x00F0, "D"}, + {0x0283, "S"}, + {0x0292, "Z"}, + {0x0282, "s'"}, + {0x0290, "z'"}, + {0x00E7, "C"}, + {0x029D, "j\\"}, + {0x0263, "G"}, + {0x03C7, "X"}, + {0x0281, "R"}, + {0x0127, "X\\"}, + {0x0295, "?\\"}, + {0x0266, "h\\"}, + {0x026C, "K"}, + {0x026E, "K\\"}, + {0x028B, "P"}, + {0x0279, "r\\"}, + {0x027B, "r\\'"}, + {0x0270, "M\\"}, + {0x026D, "l'"}, + {0x028E, "L"}, + {0x029F, "L\\"}, + {0x0253, "b_<"}, + {0x0257, "d_<"}, + {0x0284, "J\\_<"}, + {0x0260, "g_<"}, + {0x029B, "G\\_<"}, + {0x028D, "W"}, + {0x0265, "H"}, + {0x029C, "H\\"}, + {0x02A1, ">\\"}, + {0x02A2, "<\\"}, + {0x0298, "O\\"}, + {0x01C0, "|\\"}, + {0x01C3, "!\\"}, + {0x01C2, "=\\"}, + {0x01C1, "|\\|\\"}, + {0x027A, "l\\"}, + {0x0255, "s\\"}, + {0x0291, "z\\"}, + {0x0267, "x\\"}, + {0x026B, "l_G"}, + + /* Diacritics (34) */ + {0x02BC, "_>"}, + {0x0325, "_0"}, + {0x030A, "_0"}, + {0x032C, "_v"}, + {0x02B0, "_h"}, + {0x0324, "_t"}, + {0x0330, "_k"}, + {0x033C, "_N"}, + {0x032A, "_d"}, + {0x033A, "_a"}, + {0x033B, "_m"}, + {0x0339, "_O"}, + {0x031C, "_c"}, + {0x031F, "_+"}, + {0x0320, "_-"}, + {0x0308, "_"}, /* centralized %% */ + {0x033D, "_x"}, + {0x0318, "_A"}, + {0x0319, "_q"}, + {0x02DE, "'"}, + {0x02B7, "_w"}, + {0x02B2, "_j"}, + {0x02E0, "_G"}, + {0x02E4, "_?\\"}, + {0x0303, "~"}, + {0x207F, "_n"}, + {0x02E1, "_l"}, + {0x031A, "_}"}, + {0x0334, "_e"}, + {0x031D, "_r"}, + {0x031E, "_o"}, + {0x0329, "="}, + {0x032F, "_^"}, + {0x02D0, ":"}, + + /* Others (10) incomplete%% */ + {0x0361, "_"}, + {0x035C, "_"}, + {0x02C8, ""}, + {0x02CC, "%"}, + {0x02D1, ":\\"}, + {0x0306, "_X"}, + {0x2016, "||"}, + {0x203F, "-\\"}, + {0x2197, "<R>"}, + {0x2198, "<F>"}, + + /* Affricates (6) complete */ + {0x02A3, "d_z"}, + {0x02A4, "d_Z"}, + {0x02A5, "d_z\\"}, + {0x02A6, "t_s"}, + {0x02A7, "t_S"}, + {0x02A8, "t_s\\"} + }; + + +void CnvIPAPnt( const char16_t IPnt, char * XPnt ) +{ + char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */ + int idx; /* index into table */ + + /* Convert an individual IPA codepoint. + A single IPA code could map to a string. + Search the table. If it is not found, use the same character. + Since most codepoints can be contained within 16 bits, + they are represented as wide chars. */ + XPnt[0] = 0; /* clear the result string */ + + /* Search the table for the conversion. */ + for (idx = 0; idx < phn_cnt; idx ++) /* for each item in table */ + if (IPnt == PhnAry[idx].strIPA) /* matches IPA code */ + { + strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */ + return; + } + strcat(XPnt, (const char *)&ThisPnt); /* just copy it */ +} + + +/** cnvIpaToXsampa + * Convert an IPA character string to an XSAMPA character string. + * @ipaString - input IPA string to convert + * @outXsampaString - converted XSAMPA string is passed back in this parameter + * return size of the new string +*/ +int cnvIpaToXsampa( const char16_t * ipaString, char ** outXsampaString ) +{ + size_t xsize; /* size of result */ + size_t ilen; /* input length */ + int ipidx; /* index into IPA string */ + char * XPnt; /* short XSAMPA char sequence */ + + /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString. + It is the responsibility of the caller to free the allocated string. + Increment through the string. For each base & combination convert it to the XSAMP equivalent. + Because of the XSAMPA limitations, not all IPA characters will be covered. */ + XPnt = (char *) malloc(6); + xsize = (4 * strlen16(ipaString)) + 8; /* assume more than double size */ + + *outXsampaString = (char *) malloc( xsize ); /* allocate return string */ + *outXsampaString[0] = 0; + xsize = 0; /* clear final */ + ilen = strlen16(ipaString); /* length of input UTF-16 */ + for (ipidx = 0; ipidx < ilen; ipidx ++) /* for each IPA code */ + { + CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted string */ + strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */ + }; + free(XPnt); + xsize = strlen(*outXsampaString); /* get the final length */ + return xsize; +} + + + +/* Google Engine API function implementations */ /** init - * Allocates Pico memory block and initializes Pico system. + * Allocates Pico memory block and initializes the Pico system. * synthDoneCBPtr - Pointer to callback function which will receive generated samples * return tts_result */ @@ -527,18 +856,18 @@ tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr ) return TTS_FAILURE; } - picoMemArea = malloc(PICO_MEM_SIZE); + picoMemArea = malloc( PICO_MEM_SIZE ); if (!picoMemArea) { LOGE("Failed to allocate memory for Pico system"); return TTS_FAILURE; } - pico_Status ret = pico_initialize(picoMemArea, PICO_MEM_SIZE, &picoSystem); + pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); if (PICO_OK != ret) { LOGE("Failed to initialize Pico system"); - free(picoMemArea); + free( picoMemArea ); picoMemArea = NULL; return TTS_FAILURE; } @@ -571,10 +900,101 @@ tts_result TtsEngine::shutdown( void ) } cleanFiles(); - return TTS_SUCCESS; } + +/** loadLanguage + * Load a new language. + * @lang - string with ISO 3 letter language code. + * @country - string with ISO 3 letter country code . + * @variant - string with language variant for that language and country pair. + * return tts_result +*/ +tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) +{ + return TTS_FAILURE; + //return setProperty("language", value, size); +} + + +/** setLanguage + * Load a new language (locale). Use the ISO 639-3 language codes. + * @lang - string with ISO 639-3 language code. + * @country - string with ISO 3 letter country code. + * @variant - string with language variant for that language and country pair. + * return tts_result + */ +tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant ) +{ + int langIndex; + int countryIndex; + int i; + + if (lang == NULL) + { + LOGE("TtsEngine::setLanguage called with NULL language"); + return TTS_FAILURE; + } + + /* We look for a match on the language first + then we look for a match on the country. + If no match on the language: + return an error. + If match on the language, but no match on the country: + load the language found for the language match. + If match on the language, and match on the country: + load the language found for the country match. */ + + /* Find a match on the language. */ + langIndex = -1; /* no match */ + for (i = 0; i < picoNumSupportedVocs; i ++) + { + if (strcmp(lang, picoSupportedLangIso3[i]) == 0) + { + langIndex = i; + break; + } + } + if (langIndex < 0) + { + /* The language isn't supported. */ + LOGE("TtsEngine::setLanguage called with unsupported language"); + return TTS_FAILURE; + } + + /* Find a match on the country, if there is one. */ + if (country != NULL) + { + countryIndex = -1; + for (i = langIndex; i < picoNumSupportedVocs; i ++) + { + if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0) + && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) + { + countryIndex = i; + break; + } + } + + if (countryIndex < 0) + { + /* We didn't find a match on the country, but we had a match on the language. + Use that language. */ + LOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).", + lang, country); + } + else + { + /* We have a match on both the language and the country. */ + langIndex = countryIndex; + } + } + + return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */ +} + + /** isLanguageAvailable * Returns the level of support for a language. * @lang - string with ISO 3 letter language code. @@ -595,7 +1015,7 @@ tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char * } // find a match on the language - for (int i = 0; i < picoNumSupportedLang; i++) + for (int i = 0; i < picoNumSupportedVocs; i++) { if (strcmp(lang, picoSupportedLangIso3[i]) == 0) { langIndex = i; @@ -617,7 +1037,7 @@ tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char * } // find a match on the country - for (int i = langIndex; i < picoNumSupportedLang; i++) { + for (int i = langIndex; i < picoNumSupportedVocs; i++) { if ((strcmp(lang, picoSupportedLangIso3[i]) == 0) && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) { countryIndex = i; @@ -638,81 +1058,6 @@ tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char * // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned. } -/** loadLanguage - * Load a new language. - * @lang - string with ISO 3 letter language code. - * @country - string with ISO 3 letter country code . - * @variant - string with language variant for that language and country pair. - * return tts_result -*/ -tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) -{ - return TTS_FAILURE; - //return setProperty("language", value, size); -} - -/** setLanguage - * Load a new language. - * @lang - string with ISO 3 letter language code. - * @country - string with ISO 3 letter country code . - * @variant - string with language variant for that language and country pair. - * return tts_result - */ -tts_result TtsEngine::setLanguage(const char *lang, const char *country, const char *variant) { - if (lang == NULL) { - LOGE("TtsEngine::setLanguage called with NULL language"); - return TTS_FAILURE; - } - - // we look for a match on the language first - // then we look for a match on the country. - // if no match on the language: - // return an error - // if match on the language, but no match on the country: - // load the language found for the language match - // if match on the language, and match on the country: - // load the language found for the country match - - // find a match on the language - int langIndex = -1; - for (int i = 0; i < picoNumSupportedLang; i++) - { - if (strcmp(lang, picoSupportedLangIso3[i]) == 0) { - langIndex = i; - break; - } - } - if (langIndex < 0) { - // language isn't supported - LOGE("TtsEngine::setLanguage called with unsupported language"); - return TTS_FAILURE; - } - - // find a match on the country - if (country != NULL) { - int countryIndex = -1; - for (int i = langIndex; i < picoNumSupportedLang; i++) { - if ((strcmp(lang, picoSupportedLangIso3[i]) == 0) - && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) { - countryIndex = i; - break; - } - } - - if (countryIndex < 0) { - // we didn't find a match on the country, but we had a match on the language, - // use that language - LOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).", - lang, country); - } else { - // we have a match on the language and the country - langIndex = countryIndex; - } - } - - return doLanguageSwitchFromLangIndex( langIndex ); -} - /** getLanguage * Get the currently loaded language - if any. @@ -764,63 +1109,75 @@ tts_result TtsEngine::setAudioFormat(AudioSystem::audio_format& encoding, uint32 */ tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size ) { + int rate; + int pitch; + int volume; + + /* Set a specific property for the engine. + Supported properties include: language (locale), rate, pitch, volume. */ /* Sanity check */ if (property == NULL) - { + { LOGE("setProperty called with property NULL"); return TTS_PROPERTY_UNSUPPORTED; - } + } if (value == NULL) - { + { LOGE("setProperty called with value NULL"); return TTS_VALUE_INVALID; - } + } if (strncmp(property, "language", 8) == 0) - { - // verify it's in correct format - if (strlen(value) != 2 && strlen(value) != 6) { + /* Verify it's in correct format. */ + if (strlen(value) != 2 && strlen(value) != 6) + { LOGE("change language called with incorrect format"); return TTS_VALUE_INVALID; - } + } - // try to switch to specified language + /* Try to switch to specified language. */ if (doLanguageSwitch(value) == TTS_FAILURE) - { + { LOGE("failed to load language"); return TTS_FAILURE; - } + } else - { + { return TTS_SUCCESS; + } } - } else if (strncmp(property, "rate", 4) == 0) - { - int rate = atoi(value); - if (rate < PICO_MIN_RATE) rate = PICO_MIN_RATE; - if (rate > PICO_MAX_RATE) rate = PICO_MAX_RATE; + { + rate = atoi(value); + if (rate < PICO_MIN_RATE) + rate = PICO_MIN_RATE; + if (rate > PICO_MAX_RATE) + rate = PICO_MAX_RATE; picoProp_currRate = rate; return TTS_SUCCESS; - } + } else if (strncmp(property, "pitch", 5) == 0) - { - int pitch = atoi(value); - if (pitch < PICO_MIN_PITCH) pitch = PICO_MIN_PITCH; - if (pitch > PICO_MAX_PITCH) pitch = PICO_MAX_PITCH; + { + pitch = atoi(value); + if (pitch < PICO_MIN_PITCH) + pitch = PICO_MIN_PITCH; + if (pitch > PICO_MAX_PITCH) + pitch = PICO_MAX_PITCH; picoProp_currPitch = pitch; return TTS_SUCCESS; - } + } else if (strncmp(property, "volume", 6) == 0) - { - int volume = atoi(value); - if (volume < PICO_MIN_VOLUME) volume = PICO_MIN_VOLUME; - if (volume > PICO_MAX_VOLUME) volume = PICO_MAX_VOLUME; + { + volume = atoi(value); + if (volume < PICO_MIN_VOLUME) + volume = PICO_MIN_VOLUME; + if (volume > PICO_MAX_VOLUME) + volume = PICO_MAX_VOLUME; picoProp_currVolume = volume; return TTS_SUCCESS; - } + } return TTS_PROPERTY_UNSUPPORTED; } @@ -829,44 +1186,46 @@ tts_result TtsEngine::setProperty( const char * property, const char * value, co /** getProperty * Get the property. Supported properties are: language, rate, pitch and volume. * @property - name of property to get - * @value - buffer which will receive value of property - * @iosize - size of value - if size is too small on return this will contain actual size needed + * @value - buffer which will receive value of property + * @iosize - size of value - if size is too small on return this will contain actual size needed * return tts_result */ -tts_result TtsEngine::getProperty(const char *property, char *value, size_t* iosize) +tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize ) { + /* Get the property for the engine. + This property was previously set by setProperty or by default. */ /* sanity check */ if (property == NULL) - { + { LOGE("getProperty called with property NULL"); return TTS_PROPERTY_UNSUPPORTED; - } + } if (value == NULL) - { + { LOGE("getProperty called with value NULL"); return TTS_VALUE_INVALID; - } + } if (strncmp(property, "language", 8) == 0) - { - if (picoProp_currLang == NULL) { + if (picoProp_currLang == NULL) + { strcpy(value, ""); - } + } else - { - if (*iosize < strlen(picoProp_currLang)+1) { + if (*iosize < strlen(picoProp_currLang)+1) + { *iosize = strlen(picoProp_currLang) + 1; return TTS_PROPERTY_SIZE_TOO_SMALL; - } + } strcpy(value, picoProp_currLang); - } + } return TTS_SUCCESS; - } + } else if (strncmp(property, "rate", 4) == 0) - { + { char tmprate[4]; sprintf(tmprate, "%d", picoProp_currRate); if (*iosize < strlen(tmprate)+1) @@ -876,9 +1235,9 @@ tts_result TtsEngine::getProperty(const char *property, char *value, size_t* ios } strcpy(value, tmprate); return TTS_SUCCESS; - } + } else if (strncmp(property, "pitch", 5) == 0) - { + { char tmppitch[4]; sprintf(tmppitch, "%d", picoProp_currPitch); if (*iosize < strlen(tmppitch)+1) @@ -888,67 +1247,119 @@ tts_result TtsEngine::getProperty(const char *property, char *value, size_t* ios } strcpy(value, tmppitch); return TTS_SUCCESS; - } + } else if (strncmp(property, "volume", 6) == 0) - { + { char tmpvol[4]; sprintf(tmpvol, "%d", picoProp_currVolume); if (*iosize < strlen(tmpvol)+1) - { + { *iosize = strlen(tmpvol) + 1; return TTS_PROPERTY_SIZE_TOO_SMALL; - } + } strcpy(value, tmpvol); return TTS_SUCCESS; - } - else - { - LOGE("Unsupported property"); - return TTS_PROPERTY_UNSUPPORTED; - } + } + + /* Unknown property */ + LOGE("Unsupported property"); + return TTS_PROPERTY_UNSUPPORTED; } /** synthesizeText * Synthesizes a text string. - * @text - text to synthesize - * @buffer - buffer which will receive generated samples + * The text string could be annotated with SSML tags. + * @text - text to synthesize + * @buffer - buffer which will receive generated samples * @bufferSize - size of buffer * @userdata - pointer to user data which will be passed back to callback function * return tts_result */ -tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata) +tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata ) { - pico_Char* inp = NULL; - pico_Char* local_text = NULL; - short outbuf[MAX_OUTBUF_SIZE/2]; - pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; + int err; + int cbret; + pico_Char * inp = NULL; + pico_Char * local_text = NULL; + short outbuf[MAX_OUTBUF_SIZE/2]; + pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; pico_Status ret; - picoSynthAbort = 0; + SvoxSsmlParser * parser = NULL; + picoSynthAbort = 0; if (text == NULL) - { + { LOGE("synthesizeText called with NULL string"); return TTS_FAILURE; - } + } if (buffer == NULL) - { + { LOGE("synthesizeText called with NULL buffer"); return TTS_FAILURE; - } + } - /* Add property tags to the string - if any. */ - local_text = (pico_Char*)doAddProperties(text); - if (!local_text) + if ((strncmp(text, "<speak", 6) == 0) || + (strncmp(text, "<?xml", 5) == 0) ) + { + /* SSML input */ + parser = new SvoxSsmlParser(); + if (parser && parser->initSuccessful()) + { + err = parser->parseDocument(text, 1); + if (err == XML_STATUS_ERROR) + { + LOGI("Warning: SSML document parsed with errors"); + } + char * parsed_text = parser->getParsedDocument(); + if (parsed_text) + { + /* Add property tags to the string - if any. */ + local_text = (pico_Char *) doAddProperties( parsed_text ); + if (!local_text) + { + LOGE("Failed to allocate memory for text string"); + delete parser; + return TTS_FAILURE; + } + char * lang = parser->getParsedDocumentLanguage(); + if (doLanguageSwitch(lang) == TTS_FAILURE) + { + LOGE("Failed to switch to language specified in SSML document."); + delete parser; + return TTS_FAILURE; + } + delete parser; + } + else + { + LOGE("Failed to parse SSML document"); + delete parser; + return TTS_FAILURE; + } + } + else + { + LOGE("Failed to create SSML parser"); + if (parser) delete parser; + return TTS_FAILURE; + } + } + else { + /* Add property tags to the string - if any. */ + local_text = (pico_Char *) doAddProperties( text ); + if (!local_text) + { LOGE("Failed to allocate memory for text string"); return TTS_FAILURE; + } } - text_remaining = strlen((const char*)local_text) + 1; + text_remaining = strlen((const char *) local_text) + 1; - inp = (pico_Char*)local_text; + inp = (pico_Char *) local_text; size_t bufused = 0; @@ -956,19 +1367,20 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu while (text_remaining) { if (picoSynthAbort) - { - ret = pico_resetEngine(picoEngine); + { + ret = pico_resetEngine( picoEngine ); break; - } + } /* Feed the text into the engine. */ - ret = pico_putTextUtf8(picoEngine, inp, text_remaining, &bytes_sent); + ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ); if (ret != PICO_OK) - { + { LOGE("Error synthesizing string '%s': [%d]", text, ret); - if (local_text) free(local_text); + if (local_text) + free( local_text ); return TTS_FAILURE; - } + } text_remaining -= bytes_sent; inp += bytes_sent; @@ -979,36 +1391,36 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu break; } /* Retrieve the samples and add them to the buffer. */ - ret = pico_getData(picoEngine, (void*)outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type); + ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type ); if (bytes_recv) - { - if ((bufused + bytes_recv) <= bufferSize) { - memcpy(buffer+bufused, (int8_t*)outbuf, bytes_recv); + if ((bufused + bytes_recv) <= bufferSize) + { + memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv); bufused += bytes_recv; - } + } else - { + { /* The buffer filled; pass this on to the callback function. */ - int cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING); + cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING); if (cbret == TTS_CALLBACK_HALT) - { + { LOGI("Halt requested by caller. Halting."); picoSynthAbort = 1; break; - } + } bufused = 0; - memcpy(buffer, (int8_t*)outbuf, bytes_recv); + memcpy(buffer, (int8_t *) outbuf, bytes_recv); bufused += bytes_recv; + } } - } } while (PICO_STEP_BUSY == ret); - /* The synthesis is finished; notify the caller and pass the remaining samples. + /* This chunk of synthesis is finished; pass the remaining samples. Use 16 KHz, 16-bit samples. */ if (!picoSynthAbort) { - picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE); + picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING); } picoSynthAbort = 0; @@ -1016,17 +1428,28 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu { LOGE("Error occurred during synthesis [%d]", ret); if (local_text) free(local_text); + LOGV("Synth loop: sending TTS_SYNTH_DONE after error"); + picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE); return TTS_FAILURE; } } + + /* Synthesis is done; notify the caller */ + LOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop"); + picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE); - if (local_text) free(local_text); + if (local_text) { + free( local_text ); + } return TTS_SUCCESS; } /** synthesizeIpa * Synthesizes a phonetic string in IPA format. + The Pico engine understands only XSAMPA, so the IPA must be converted. + The string must also be parceled into words since it can only hanlde a word at a time. + Each phonemic word must be wrapped with a tag. * @ipa - phonetic string to synthesize * @buffer - buffer which will receive generated samples * @bufferSize - size of buffer @@ -1035,114 +1458,9 @@ tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bu */ tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t bufferSize, void * userdata ) { - pico_Char* inp = NULL; - pico_Char* local_text = NULL; - short outbuf[MAX_OUTBUF_SIZE/2]; - pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; - pico_Status ret; - - picoSynthAbort = 0; - if (ipa == NULL) - { - LOGE("synthesizeIpa called with NULL string"); - return TTS_FAILURE; - } - - if (buffer == NULL) - { - LOGE("synthesizeIpa called with NULL buffer"); - return TTS_FAILURE; - } - - /* Append phoneme tag. %%% - <phoneme ph="xxx"/> */ - - /* Add property tags to the string - if any. */ - local_text = (pico_Char*)doAddProperties( ipa ); - if (!local_text) - { - LOGE("Failed to allocate memory for text string"); - return TTS_FAILURE; - } - - text_remaining = strlen((const char*)local_text) + 1; - - inp = (pico_Char*)local_text; - - size_t bufused = 0; - - /* synthesis loop */ - while (text_remaining) - { - if (picoSynthAbort) - { - ret = pico_resetEngine( picoEngine ); - break; - } - - /* Feed the text into the engine. */ - ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ); - if (ret != PICO_OK) - { - LOGE("Error synthesizing string '%s': [%d]", ipa, ret); - if (local_text) free(local_text); - return TTS_FAILURE; - } - - /* Process the remaining string. */ - text_remaining -= bytes_sent; - inp += bytes_sent; - do - { - if (picoSynthAbort) - { - break; - } - /* Retrieve the samples and add them to the buffer. */ - ret = pico_getData( picoEngine, (void*)outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type ); - if (bytes_recv) - { - if ((bufused + bytes_recv) <= bufferSize) - { - memcpy(buffer+bufused, (int8_t*)outbuf, bytes_recv); - bufused += bytes_recv; - } - else - { - /* The buffer filled; pass this on to the callback function. */ - int cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING); - if (cbret == TTS_CALLBACK_HALT) - { - LOGI("Halt requested by caller. Halting."); - picoSynthAbort = 1; - break; - } - bufused = 0; - memcpy(buffer, (int8_t*)outbuf, bytes_recv); - bufused += bytes_recv; - } - } - } while (PICO_STEP_BUSY == ret); - - /* The synthesis is finished; notify the caller and pass the remaining samples. - Use 16 KHz, 16-bit samples. */ - if (!picoSynthAbort) - { - picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE ); - } - picoSynthAbort = 0; /* succeeded */ - - if (ret != PICO_STEP_IDLE) - { - LOGE("Error occurred during synthesis [%d]", ret); - if (local_text) free(local_text); - return TTS_FAILURE; - } - } + // deprecated call + return TTS_FAILURE; - if (local_text) - free(local_text); - return TTS_SUCCESS; /* succeeded */ } @@ -1150,7 +1468,7 @@ tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t b * Aborts the running synthesis. * return tts_result */ -tts_result TtsEngine::stop() +tts_result TtsEngine::stop( void ) { picoSynthAbort = 1; return TTS_SUCCESS; @@ -1161,7 +1479,7 @@ tts_result TtsEngine::stop() extern "C" { #endif -TtsEngine* getTtsEngine() +TtsEngine * getTtsEngine( void ) { return new TtsEngine(); } diff --git a/pico/tts/svox_ssml_parser.cpp b/pico/tts/svox_ssml_parser.cpp new file mode 100755 index 0000000..53dc786 --- /dev/null +++ b/pico/tts/svox_ssml_parser.cpp @@ -0,0 +1,893 @@ +/* + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * History: + * 2009-06-29 -- initial version + * + */ + +#include "svox_ssml_parser.h" +#include <utils/Log.h> +#include <cutils/jstring.h> +#include <string.h> + + +#define SSML_PITCH_XLOW "50" +#define SSML_PITCH_LOW "75" +#define SSML_PITCH_MEDIUM "100" +#define SSML_PITCH_HIGH "150" +#define SSML_PITCH_XHIGH "200" +#define SSML_RATE_XSLOW "30" +#define SSML_RATE_SLOW "60" +#define SSML_RATE_MEDIUM "100" +#define SSML_RATE_FAST "250" +#define SSML_RATE_XFAST "500" +#define SSML_VOLUME_SILENT "0" +#define SSML_VOLUME_XLOW "20" +#define SSML_VOLUME_LOW "60" +#define SSML_VOLUME_MEDIUM "100" +#define SSML_VOLUME_LOUD "300" +#define SSML_VOLUME_XLOUD "450" +#define SSML_BREAK_NONE "0ms" +#define SSML_BREAK_XWEAK "100ms" +#define SSML_BREAK_WEAK "300ms" +#define SSML_BREAK_MEDIUM "600ms" +#define SSML_BREAK_STRONG "1s" +#define SSML_BREAK_XSTRONG "3s" + +//TODO JMT remove comment +//extern int cnvIpaToXsampa(const char16_t* ipaString, char** outXsampaString); + +SvoxSsmlParser::SvoxSsmlParser() : m_isInBreak(0), m_appendix(NULL), m_docLanguage(NULL) +{ + mParser = XML_ParserCreate("UTF-8"); + if (mParser) + { + XML_SetElementHandler(mParser, starttagHandler, endtagHandler); + XML_SetCharacterDataHandler(mParser, textHandler); + XML_SetUserData(mParser, (void*)this); + m_datasize = 512; + m_data = new char[m_datasize]; + m_data[0] = '\0'; + } +} + +SvoxSsmlParser::~SvoxSsmlParser() +{ + if (mParser) + XML_ParserFree(mParser); + if (m_data) + delete [] m_data; + if (m_appendix) + delete [] m_appendix; + if (m_docLanguage) + delete [] m_docLanguage; +} + +int SvoxSsmlParser::initSuccessful() +{ + return (mParser && m_data); +} + +int SvoxSsmlParser::parseDocument(const char* ssmldoc, int isFinal) +{ + int doclen = (int)strlen(ssmldoc) + 1; + int status = XML_Parse(mParser, ssmldoc, doclen, isFinal); + if (status == XML_STATUS_ERROR) + { + /* Note: for some reason Expat almost always complains about invalid tokens, even when document is well formed */ + LOGI("Parser error at line %d: %s\n", (int)XML_GetCurrentLineNumber(mParser), XML_ErrorString(XML_GetErrorCode(mParser))); + } + return status; +} + +char* SvoxSsmlParser::getParsedDocument() +{ + return m_data; +} + +char* SvoxSsmlParser::getParsedDocumentLanguage() +{ + return m_docLanguage; +} + +void SvoxSsmlParser::starttagHandler(void* data, const XML_Char* element, const XML_Char** attributes) +{ + ((SvoxSsmlParser*)data)->startElement(element, attributes); +} + +void SvoxSsmlParser::startElement(const XML_Char* element, const XML_Char** attributes) +{ + if (strcmp(element, "speak") == 0) + { + if (strlen(m_data) > 0) + { + /* we have old data, get rid of it and reallocate memory */ + delete m_data; + m_data = NULL; + m_datasize = 512; + m_data = new char[m_datasize]; + if (!m_data) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + + /* the only attribute supported in the speak tag is xml:lang, all others are ignored */ + for (int i = 0; attributes[i]; i += 2) + { + if (strcmp(attributes[i], "xml:lang") == 0) + { + if (!m_docLanguage) + { + m_docLanguage = new char[strlen(attributes[i+1])+1]; + } + strcpy(m_docLanguage, attributes[i+1]); + break; + } + } + } + else if (strcmp(element, "p") == 0) /* currently no attributes are supported for <p> */ + { + if (strlen(m_data) + 4 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "<p>"); + } + else if (strcmp(element, "s") == 0) /* currently no attributes are supported for <s> */ + { + if (strlen(m_data) + 4 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "<s>"); + } + else if (strcmp(element, "phoneme") == 0) /* only ipa and xsampa alphabets are supported */ + { + if (strlen(m_data) + 9 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "<phoneme"); + + int alpha = 1; /* set to 1 if alphabet is ipa */ + char* ph = NULL; + + for (int i = 0; attributes[i]; i += 2) + { + if (strcmp(attributes[i], "alphabet") == 0) + { + if (strcmp(attributes[i+1], "xsampa") == 0) + { + alpha = 0; + } + } + if (strcmp(attributes[i], "ph") == 0) + { + ph = new char[strlen(attributes[i+1]) + 1]; + strcpy(ph, attributes[i+1]); + } + } + if (alpha) + { + /* need to convert phoneme string to xsampa */ + size_t size = 0; + char16_t* ipastr = strdup8to16(ph, &size); + char16_t* xsampastr = NULL; + if (!ipastr) + { + LOGE("Error: failed to allocate memory for IPA string conversion"); + return; + } + //TODO JMT remove comment + //size = cnvIpaToXsampa(ipastr, &xsampastr); + free(ipastr); + char* xsampa = strndup16to8(xsampastr, size); + if (!xsampa) + { + LOGE("Error: failed to allocate memory for IPA string conversion"); + delete [] xsampastr; + return; + } + if (strlen(m_data) + strlen(xsampa) + 7 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!"); + delete [] xsampastr; + free(xsampa); + return; + } + } + strcat(m_data, " ph='"); + strcat(m_data, xsampa); + delete [] xsampastr; + free(xsampa); + } + else + { + strcat(m_data, " ph='"); + strcat(m_data, ph); + } + if (ph) + delete [] ph; + + if (strlen(m_data) + 3 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "'>"); + } + else if (strcmp(element, "break") == 0) + { + if (strlen(m_data) + 17 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "<break time='"); + char* time = NULL; + + for (int i = 0; attributes[i]; i += 2) + { + if (strcmp(attributes[i], "time") == 0) + { + time = new char[strlen(attributes[i+1]) + 1]; + if (!time) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + strcpy(time, attributes[i+1]); + } + else if (strcmp(attributes[i], "strength") == 0 && !time) + { + time = convertBreakStrengthToTime(attributes[i+1]); + } + } + if (!time) + { + time = new char[6]; + if (!time) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + strcpy(time, SSML_BREAK_WEAK); /* if no time or strength attributes are specified, default to weak break */ + } + if (strlen(m_data) + strlen(time) + 4 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, time); + strcat(m_data, "'/>"); + m_isInBreak = 1; + } + else if (strcmp(element, "prosody") == 0) /* only pitch, rate and volume attributes are supported */ + { + for (int i = 0; attributes[i]; i += 2) + { + if (strcmp(attributes[i], "pitch") == 0) + { + char* svoxpitch = convertToSvoxPitch(attributes[i+1]); + if (!svoxpitch) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + if (!svoxpitch) + { + svoxpitch = new char[4]; + if (!svoxpitch) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + strcpy(svoxpitch, "100"); + } + char* pitch = new char[17 + strlen(svoxpitch)]; + if (!pitch) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + sprintf(pitch, "<pitch level='%s'>", svoxpitch); + if (strlen(m_data) + strlen(pitch) + 1 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, pitch); + if (!m_appendix) + { + m_appendix = new char[30]; + m_appendix[0] = '\0'; + } + strcat(m_appendix, "</pitch>"); + delete [] svoxpitch; + delete [] pitch; + } + else if (strcmp(attributes[i], "rate") == 0) + { + char* svoxrate = convertToSvoxRate(attributes[i+1]); + if (!svoxrate) + { + svoxrate = new char[4]; + if (!svoxrate) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + strcpy(svoxrate, "100"); + } + char* rate = new char[17 + strlen(svoxrate)]; + if (!rate) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + sprintf(rate, "<speed level='%s'>", svoxrate); + if (strlen(m_data) + strlen(rate) + 1 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, rate); + if (!m_appendix) + { + m_appendix = new char[30]; + if (!m_appendix) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + m_appendix[0] = '\0'; + } + strcat(m_appendix, "</speed>"); + delete [] svoxrate; + delete [] rate; + } + else if (strcmp(attributes[i], "volume") == 0) + { + char* svoxvol = convertToSvoxVolume(attributes[i+1]); + if (!svoxvol) + { + svoxvol = new char[4]; + if (!svoxvol) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + strcpy(svoxvol, "100"); + } + char* volume = new char[18 + strlen(svoxvol)]; + if (!volume) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + sprintf(volume, "<volume level='%s'>", svoxvol); + if (strlen(m_data) + strlen(volume) + 1 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, volume); + if (!m_appendix) + { + m_appendix = new char[30]; + m_appendix[0] = '\0'; + } + strcat(m_appendix, "</volume>"); + delete [] svoxvol; + delete [] volume; + } + } + } + else if (strcmp(element, "audio") == 0) /* only 16kHz 16bit wav files are supported as src */ + { + if (strlen(m_data) + 17 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "<usesig file='"); + + for (int i = 0; attributes[i]; i += 2) + { + if (strcmp(attributes[i], "src") == 0) + { + if (strlen(m_data) + strlen(attributes[i+1]) + 1 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, attributes[i+1]); + } + } + strcat(m_data, "'>"); + } +} + +void SvoxSsmlParser::endtagHandler(void* data, const XML_Char* element) +{ + ((SvoxSsmlParser*)data)->endElement(element); +} + +void SvoxSsmlParser::endElement(const XML_Char* element) +{ + if (strcmp(element, "speak") == 0) + { + + } + else if (strcmp(element, "p") == 0) + { + if (strlen(m_data) + 5 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "</p>"); + } + else if (strcmp(element, "s") == 0) + { + if (strlen(m_data) + 5 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "</s>"); + } + else if (strcmp(element, "phoneme") == 0) + { + if (strlen(m_data) + 11 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "</phoneme>"); + } + else if (strcmp(element, "break") == 0) + { + m_isInBreak = 0; /* indicate we are no longer in break tag */ + } + else if (strcmp(element, "prosody") == 0) + { + if (m_appendix) + { + if (strlen(m_data) + strlen(m_appendix) + 1 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, m_appendix); + delete [] m_appendix; + m_appendix = NULL; + } + } + else if (strcmp(element, "audio") == 0) + { + if (strlen(m_data) + 10 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, "</usesig>"); + } +} + +void SvoxSsmlParser::textHandler(void* data, const XML_Char* text, int length) +{ + ((SvoxSsmlParser*)data)->textElement(text, length); +} + +void SvoxSsmlParser::textElement(const XML_Char* text, int length) +{ + if (m_isInBreak) + { + return; /* handles the case when someone has added text inside the break tag - this text is thrown away */ + } + + char* content = new char[length + 1]; + if (!content) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + strncpy(content, text, length); + content[length] = '\0'; + + if (strlen(m_data) + strlen(content) + 1 > (size_t)m_datasize) + { + if (!growDataSize(100)) + { + LOGE("Error: failed to allocate memory for string!\n"); + return; + } + } + strcat(m_data, content); + delete [] content; +} + +/** + convertToSvoxPitch + Converts SSML pitch labels to SVOX pitch levels +*/ +char* SvoxSsmlParser::convertToSvoxPitch(const char* value) +{ + char* converted = NULL; + if (strcmp(value, "x-low") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_PITCH_XLOW); + } + else if (strcmp(value, "low") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_PITCH_LOW); + } + else if (strcmp(value, "medium") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_PITCH_MEDIUM); + } + else if (strcmp(value, "default") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_PITCH_MEDIUM); + } + else if (strcmp(value, "high") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_PITCH_HIGH); + } + else if (strcmp(value, "x-high") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_PITCH_XHIGH); + } + return converted; +} + +/** + convertToSvoxRate + Converts SSML rate labels to SVOX speed levels +*/ +char* SvoxSsmlParser::convertToSvoxRate(const char* value) +{ + char* converted = NULL; + if (strcmp(value, "x-slow") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_RATE_XSLOW); + } + else if (strcmp(value, "slow") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_RATE_SLOW); + } + else if (strcmp(value, "medium") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_RATE_MEDIUM); + } + else if (strcmp(value, "default") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_RATE_MEDIUM); + } + else if (strcmp(value, "fast") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_RATE_FAST); + } + else if (strcmp(value, "x-fast") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_RATE_XFAST); + } + return converted; +} + +/** + convertToSvoxVolume + Converts SSML volume labels to SVOX volume levels +*/ +char* SvoxSsmlParser::convertToSvoxVolume(const char* value) +{ + char* converted = NULL; + if (strcmp(value, "silent") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_SILENT); + } + else if (strcmp(value, "x-low") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_XLOW); + } + else if (strcmp(value, "low") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_LOW); + } + else if (strcmp(value, "medium") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_MEDIUM); + } + else if (strcmp(value, "default") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_MEDIUM); + } + else if (strcmp(value, "loud") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_LOUD); + } + else if (strcmp(value, "x-loud") == 0) + { + converted = new char[4]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_VOLUME_XLOUD); + } + return converted; +} + +/** + convertBreakStrengthToTime + Converts SSML break strength labels to SVOX break time +*/ +char* SvoxSsmlParser::convertBreakStrengthToTime(const char* value) +{ + char* converted = NULL; + if (strcmp(value, "none") == 0) + { + converted = new char[6]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_BREAK_NONE); + } + else if (strcmp(value, "x-weak") == 0) + { + converted = new char[6]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_BREAK_XWEAK); + } + else if (strcmp(value, "weak") == 0) + { + converted = new char[6]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_BREAK_WEAK); + } + else if (strcmp(value, "medium") == 0) + { + converted = new char[6]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_BREAK_MEDIUM); + } + else if (strcmp(value, "strong") == 0) + { + converted = new char[6]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_BREAK_STRONG); + } + else if (strcmp(value, "x-strong") == 0) + { + converted = new char[6]; + if (!converted) + { + LOGE("Error: failed to allocate memory for string!\n"); + return NULL; + } + strcpy(converted, SSML_BREAK_XSTRONG); + } + return converted; +} + +/** + growDataSize + Increases the size of the internal text storage member +*/ +int SvoxSsmlParser::growDataSize(int sizeToGrow) +{ + char* tmp = new char[m_datasize]; + if (!tmp) + return 0; + + strcpy(tmp, m_data); + delete [] m_data; + m_data = NULL; + m_data = new char[m_datasize + sizeToGrow]; + if (!m_data) + { + m_data = tmp; + return 0; + } + m_datasize += sizeToGrow; + strcpy(m_data, tmp); + delete [] tmp; + tmp = NULL; + return 1; +} diff --git a/pico/tts/svox_ssml_parser.h b/pico/tts/svox_ssml_parser.h new file mode 100755 index 0000000..cc83305 --- /dev/null +++ b/pico/tts/svox_ssml_parser.h @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * History: + * 2009-06-29 -- initial version + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <libexpat/expat.h> + +#ifndef _SVOX_SSML_PARSER_H_ +#define _SVOX_SSML_PARSER_H_ + +/** + * SvoxSsmlParser + * Parses SSML 1.0 XML documents and convertes it to Pico compatible text input + */ +class SvoxSsmlParser +{ + public: /* construction code */ + + /** + Constructor + Creates Expat parser and allocates initial text storage + */ + SvoxSsmlParser(); + + /** + Destructor + Deallocates all resources + */ + ~SvoxSsmlParser(); + + /** + initSuccessful + Verifies that construction was successful + return 1 if successful, 0 otherwise + */ + int initSuccessful(); + + public: /* public members */ + + /** + parseDocument + Parses SSML 1.0 document passed in as argument + @ssmldoc - SSML document, partial document input is supported + @isFinal - indicates whether input is a complete or partial document, 1 indicates complete document, 0 indicates partial + return Expat status code + */ + int parseDocument(const char* ssmldoc, int isFinal); + + /** + getParsedDocument + Returns string containing parse result. This can be passed on to Pico for synthesis + return parsed string, NULL if error occurred + */ + char* getParsedDocument(); + + /** + getParsedDocumentLanguage + Returns language string specified in xml:lang attribute of the <speak> tag + return language code of SSML document, NULL if not set + */ + char* getParsedDocumentLanguage(); + + private: /* static callback functions */ + + /** + starttagHandler + Static callback function for Expat start-tag events, internal use only + */ + static void starttagHandler(void* data, const XML_Char* element, const XML_Char** attributes); + + /** + endtagHandler + Static callback function for Expat end-tag events, internal use only + */ + static void endtagHandler(void* data, const XML_Char* element); + + /** + textHandler + Static callback function for Expat text events, internal use only + */ + static void textHandler(void* data, const XML_Char* text, int length); + + private: /* element handlers */ + + /** + startElement + Handles start of element, called by starttagHandler. + */ + void startElement(const XML_Char* element, const XML_Char** attributes); + + /** + endElement + Handles end of element, called by endtagHandler. + */ + void endElement(const XML_Char* element); + + /** + textElement + Handles text element, called by textHandler. + */ + void textElement(const XML_Char* text, int length); + + /* helper functions */ + + /** + convertToSvoxPitch + Convertes SSML prosody tag pitch values to SVOX Pico pitch values. + */ + char* convertToSvoxPitch(const char* value); + + /** + convertToSvoxRate + Convertes SSML prosody tag rate values to SVOX Pico speed values. + */ + char* convertToSvoxRate(const char* value); + + /** + convertToSvoxVolume + Convertes SSML prosody tag volume values to SVOX Pico volume values. + */ + char* convertToSvoxVolume(const char* value); + + /** + convertBreakStrengthToTime + Convertes SSML break tag strength attribute values to SVOX Pico break time values. + */ + char* convertBreakStrengthToTime(const char* value); + + /** + growDataSize + Increases size of internal text field. + */ + int growDataSize(int sizeToGrow); + + private: /* data members*/ + + char* m_data; /* internal text field, holds parsed text */ + int m_datasize; /* size of internal text field */ + XML_Parser mParser; /* Expat XML parser pointer */ + int m_isInBreak; /* indicator for handling break tag parsing */ + char* m_appendix; /* holds Pico pitch, speed and volume close tags for prosody tag parsing */ + char* m_docLanguage; /* language set in speak tag of SSML document */ +}; + +#endif // _SVOX_SSML_PARSER_H_ |