diff options
author | Charles Chen <clchen@google.com> | 2009-06-22 16:25:25 -0700 |
---|---|---|
committer | Charles Chen <clchen@google.com> | 2009-06-22 17:14:37 -0700 |
commit | 1284d937084a20b457c280259fff59391129509a (patch) | |
tree | 5630028284c450b56a56b187d9c99cf7ebcee9cc /pico/lib/picospho.c | |
parent | f605ee98e5e03144c25a92af7e5d2a3ec33d375f (diff) | |
download | external_svox-1284d937084a20b457c280259fff59391129509a.zip external_svox-1284d937084a20b457c280259fff59391129509a.tar.gz external_svox-1284d937084a20b457c280259fff59391129509a.tar.bz2 |
Moving PicoTts plugin under the pico directory of external/svox
Diffstat (limited to 'pico/lib/picospho.c')
-rw-r--r-- | pico/lib/picospho.c | 1694 |
1 files changed, 1694 insertions, 0 deletions
diff --git a/pico/lib/picospho.c b/pico/lib/picospho.c new file mode 100644 index 0000000..0d0cdf8 --- /dev/null +++ b/pico/lib/picospho.c @@ -0,0 +1,1694 @@ +/* + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file picospho.c + * + * sentence phonemic/phonetic FSTs PU + * + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * All rights reserved. + * + * History: + * - 2009-04-20 -- initial version + * + */ + +#include "picoos.h" +#include "picodbg.h" +#include "picodata.h" + +#include "picoknow.h" +#include "picokfst.h" +#include "picoktab.h" +#include "picotrns.h" + +#include "picospho.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +#define SPHO_BUFSIZE (3 * PICODATA_BUFSIZE_DEFAULT) + + + +#define SPHO_MAX_ALTDESC_SIZE (60 * PICOTRNS_MAX_NUM_POSSYM) + + +#define SPHO_SMALLEST_SIL_DUR 1 + + +/** @addtogroup picospho + * + * Algorithmic description + * ======================= + * The main function, sphoStep, is divided into the subprocesses (processing states) described further down. + * + * Flow control: + * ------------ + * The processing flow is controlled by setting + * - 'procState' : the next state to be processed + * - 'feedFollowState' : the state to be processed after the feed state (the feed state is treated like a primitive "subroutine") + * - some other flags + * + * Buffering: + * --------- + * - The input items are mainly stored and processed in two buffers, collectively called 'inBuf' + * - cbuf : unstructured buffer containing item contents + * - headx : structured buffer containing item heads, each expanded by a pointer to the item contents + * and space for a boundary potentially to be inserted (to the left of the original item) + * - For transduction, phonemes and their position are extracted from inBuf into + * - phonBuf, + * processed there, and the resulting phonemes realigned with inBuf. + * - Word items are split into syllables, stored in + * - sylBuf + * - Items to be output are stored in outBuf + * + * Windowing: + * --------- + * Optimal solutions are achieved if a whole sentence is processed at once. However, if any of the buffers are too small, + * only sentence parts are processed. To improve the quality of such sub-optimal solutions, a moving-window-with-overlap is applied: + * - [0,headxReadPos[ : the window considered for transduction + * - [activeStartPos,activeEndPos[ : the "active" subrange of the window actually used for output + * - penultima : the position (within the active range) that should be used as new window start when shifting the window + * + * After PROCESS_PARSE: + * 0 activeStartPos penultima activeEndPos headxReadPos headxWritePos + * | | | | | | + * |-------------=================================---------------| ['----': context '====' : active subrange) + * + * After PROCESS_SHIFT: + * 0 activeStartPos headWritePos + * | | | | + * |------------... (only left context is known; new active range, penultima, and right context to be established at next parse) + * + * Processing states: + * ----------------- + * - INIT : initialize state variables + * - COLLECT : collect items into internal buffers ("inBuf") + * - PROCESS_PARSE : go through inBuf items and extract position/phoneme pairs into phoneme buffer 'phonBuf' + * word boundary phonemes are inserted between words + * - PROCESS_TRANSDUCE : transduce phonBuf + * - PROCESS_BOUNDS : go through inBuf items again and match against transduced pos/phoneme + * this is the first round of alignment, only inserting/deleting/modifying bounds, according to + * - existing BOUND items + * - newly produced word bounds separating WORDPHON items + * - bound upgrades/downgrades from transduction + * - bound upgrades/downgrades/insertions from SIL command items (originating e.g. from <break> text commands) + * all relevant bounds are placed in the corresponding headx extention; original bound items become invalid. + * - PROCESS_RECOMB : go through inBuf items again and match against transduced pos/phoneme + * this is the second round of alignment, treating non-BOUND items + * - WORDPHONs are broken into syllables by "calling" PROCESS_SYL + * - "side-bounds" (in the headx extension) are output by "calling" FEED + * - BOUND items are consumed with no effect + * - other items are output unchanged "calling" FEED + * - PROCESS_SYL : the WORDPHON coming from RECOMB is matched against the phonBuf and (new) SYLLPHON items + * are created. (the original wordphon is consumed) + * - FEED : feeds one item and returns to spho->feedFollowState + * - SHIFT : items in inBuf are shifted left to make room for new items. If a sentence doesn't fit + * inBuf in its entirety, left and/or right contexts are kept so they can be considered in + * the next transduction. + */ + + + +/* PU sphoStep states */ +#define SPHO_STEPSTATE_INIT 0 +#define SPHO_STEPSTATE_COLLECT 1 +#define SPHO_STEPSTATE_PROCESS_PARSE 2 +#define SPHO_STEPSTATE_PROCESS_TRANSDUCE 3 +#define SPHO_STEPSTATE_PROCESS_BOUNDS 4 +#define SPHO_STEPSTATE_PROCESS_RECOMB 5 +#define SPHO_STEPSTATE_PROCESS_SYL 6 +#define SPHO_STEPSTATE_FEED 7 +#define SPHO_STEPSTATE_SHIFT 8 + +#define SPHO_POS_INVALID (PICOTRNS_POS_INVALID) /* indicates that no position was set yet */ + +/* nr item restriction: maximum number of extended item heads in headx */ +#define SPHO_MAXNR_HEADX 60 + +/* nr item restriction: maximum size of all item contents together in cont */ +#define SPHO_MAXSIZE_CBUF (30 * 255) + +/* "expanded head": item head expanded by a content position and a by boundary information + * potentially inserted "to the left" of the item */ +typedef struct { + picodata_itemhead_t head; + picoos_uint16 cind; + picoos_uint8 boundstrength; /* bstrength to the left, 0 if not set */ + picoos_uint8 phrasetype; /* btype for following phrase, 0 if not set */ + picoos_int16 sildur; /* silence duration for boundary, -1 if not set */ +} picospho_headx_t; + + + +#define SPHO_MSGSTR_SIZE 32 + +/** object : SentPhoUnit + * shortcut : spho + * derived from : picodata_ProcessingUnit + */ +typedef struct spho_subobj { + picoos_Common common; + + /* we use int16 for buffer positions so we can indicate exceptional positions (invalid etc.) with negative + * integers */ + picoos_uint8 procState; /* for next processing step decision */ + + /* buffer for item headers */ + picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE]; /* tmp. location for an item */ + + picospho_headx_t headx[SPHO_MAXNR_HEADX]; /* "expanded head" buffer */ + picoos_uint16 headxBufSize; /* actually allocated size (if one day headxBuf is allocated dynamically) */ + picoos_uint16 headxReadPos, headxWritePos; + + picoos_uint8 cbuf[SPHO_MAXSIZE_CBUF]; + picoos_uint16 cbufBufSize; /* actually allocated size */ + picoos_uint16 cbufWritePos; /* next position to write to, 0 if buffer empty */ + + picoos_uint8 outBuf[PICODATA_BUFSIZE_DEFAULT]; /* internal output buffer to hold just one item */ + picoos_uint16 outBufSize; /* actually allocated size (if one day outBuf is allocated dynamically) */ + picoos_uint16 outReadPos; /* next pos to read from inBuf for output */ + + /* picoos_int16 outWritePos; */ /* next pos to output from in buf */ + + picoos_uint8 sylBuf[255]; /* internal buffer to hold contents of syl item to be output */ + picoos_uint8 sylReadPos, sylWritePos; /* next pos to read from sylBuf, next pos to write to sylBuf */ + + /* buffer for internal calculation of transducer */ + picotrns_AltDesc altDescBuf; + /* the number of AltDesc in the buffer */ + picoos_uint16 maxAltDescLen; + + /* the input to a transducer should not be larger than PICOTRNS_MAX_NUM_POSSYM + * so the output may expand (up to 4*PICOTRNS_MAX_NUM_POSSYM) */ + + picotrns_possym_t phonBufA[4 * PICOTRNS_MAX_NUM_POSSYM + 1]; + picotrns_possym_t phonBufB[4 * PICOTRNS_MAX_NUM_POSSYM + 1]; + picotrns_possym_t * phonBuf; + picotrns_possym_t * phonBufOut; + picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */ + + picoos_int16 activeStartPos; /* start position of items to be treated (at end of left context) */ + picoos_int16 penultima, activeEndPos; /* positions of last two bounds/words; SPHO_POS_INVALID means uninitialized */ + picoos_int16 lastPhraseBoundPos; /* position of the last bound encountered (<0 if inexistent or not reachable */ + picoos_uint8 lastPhraseType; /* phrase type of the last phrase boundary, 0 if not set */ + + picoos_uint8 needMoreInput, /* more data necessary to decide on token */ + suppressParseWordBound, /* dont produce word boundary */ + suppressRecombWordBound, /* dont produce word boundary */ + breakPending, /* received a break but didn't interpret it yet */ + /* sentEnd, */ /* sentence end detected */ + force, /* in forced state */ + wordStarted, /* is it the first syl in the word: expect POS */ + sentenceStarted; + + picoos_uint16 breakTime; /* time argument of the pending break command */ + + picoos_uint8 feedFollowState; /* where to return after feed */ + + /* fst knowledge bases */ + picoos_uint8 numFsts; + picokfst_FST fst[PICOKNOW_MAX_NUM_SPHO_FSTS]; + picoos_uint8 curFst; /* the fst to be applied next */ + + /* fixed ids knowledge base */ + picoktab_FixedIds fixedIds; + + /* phones kb */ + picoktab_Phones phones; + + /* some soecial ids from phones */ + picoos_uint8 primStressId, secondStressId, syllSepId; + +} spho_subobj_t; + + +static pico_status_t sphoReset(register picodata_ProcessingUnit this) +{ + + spho_subobj_t * spho; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(this->common->em, + PICO_ERR_NULLPTR_ACCESS, NULL, NULL); + } + spho = (spho_subobj_t *) this->subObj; + + spho->curFst = 0; + +/* processing state */ + spho->procState = SPHO_STEPSTATE_INIT; + spho->needMoreInput = TRUE; + spho->suppressParseWordBound = FALSE; + spho->suppressRecombWordBound = FALSE; + spho->breakPending = FALSE; + spho->force = 0; + spho->sentenceStarted = 0; + + + /* item buffer headx/cbuf */ + spho->headxBufSize = SPHO_MAXNR_HEADX; + spho->headxReadPos = 0; + spho->headxWritePos = 0; + + spho->cbufWritePos = 0; + spho->cbufBufSize = SPHO_MAXSIZE_CBUF; + + /* possym buffer */ + spho->phonBuf = spho->phonBufA; + spho->phonBufOut = spho->phonBufB; + spho->phonReadPos = 0; + + /* overlapping */ + spho->activeStartPos = 0; + spho->penultima = SPHO_POS_INVALID; + spho->activeEndPos = SPHO_POS_INVALID; + + return PICO_OK; +} + + +static pico_status_t sphoInitialize(register picodata_ProcessingUnit this) +{ + picoos_uint8 i; + spho_subobj_t * spho; + picokfst_FST fst; + + picoknow_kb_id_t myKbIds[PICOKNOW_MAX_NUM_SPHO_FSTS] = PICOKNOW_KBID_SPHO_ARRAY; + + PICODBG_DEBUG(("init")); + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(this->common->em, + PICO_ERR_NULLPTR_ACCESS, NULL, NULL); + } + + spho = (spho_subobj_t *) this->subObj; + + spho->numFsts = 0; + + spho->curFst = 0; + + for (i = 0; i<PICOKNOW_MAX_NUM_SPHO_FSTS; i++) { + fst = picokfst_getFST(this->voice->kbArray[myKbIds[i]]); + if (NULL != fst) { + spho->fst[spho->numFsts++] = fst; + } + } + spho->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]); + spho->phones = picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]); + + spho->syllSepId = picoktab_getSyllboundID(spho->phones); + spho->primStressId = picoktab_getPrimstressID(spho->phones); + spho->secondStressId = picoktab_getSecstressID(spho->phones); + + PICODBG_DEBUG(("got %i fsts", spho->numFsts)); + + + return sphoReset(this); + +} + +static picodata_step_result_t sphoStep(register picodata_ProcessingUnit this, + picoos_int16 mode, picoos_uint16 *numBytesOutput); + + + + +static pico_status_t sphoTerminate(register picodata_ProcessingUnit this) +{ + return PICO_OK; +} + + +static pico_status_t sphoSubObjDeallocate(register picodata_ProcessingUnit this, + picoos_MemoryManager mm) +{ + spho_subobj_t * spho; + + spho = (spho_subobj_t *) this->subObj; + + if (NULL != this) { + if (NULL != this->subObj) { + spho = (spho_subobj_t *) (this->subObj); + picotrns_deallocate_alt_desc_buf(spho->common->mm,&spho->altDescBuf); + picoos_deallocate(mm, (void *) &this->subObj); + } + } + return PICO_OK; +} + +picodata_ProcessingUnit picospho_newSentPhoUnit(picoos_MemoryManager mm, + picoos_Common common, picodata_CharBuffer cbIn, + picodata_CharBuffer cbOut, picorsrc_Voice voice) +{ + spho_subobj_t * spho; + + picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice); + if (this == NULL) { + return NULL; + } + + this->initialize = sphoInitialize; + this->step = sphoStep; + this->terminate = sphoTerminate; + this->subDeallocate = sphoSubObjDeallocate; + + this->subObj = picoos_allocate(mm, sizeof(spho_subobj_t)); + if (this->subObj == NULL) { + picoos_deallocate(mm, (void **)(void*)&this); + return NULL; + } + spho = (spho_subobj_t *) this->subObj; + + spho->common = this->common; + + /* these are given by the pre-allocated array sizes */ + spho->outBufSize = PICODATA_BUFSIZE_DEFAULT; + + + spho->altDescBuf = picotrns_allocate_alt_desc_buf(spho->common->mm, SPHO_MAX_ALTDESC_SIZE, &spho->maxAltDescLen); + if (NULL == spho->altDescBuf) { + picotrns_deallocate_alt_desc_buf(spho->common->mm,&spho->altDescBuf); + picoos_emRaiseException(spho->common->em,PICO_EXC_OUT_OF_MEM, NULL,NULL); + return NULL; + } + + sphoInitialize(this); + return this; +} + + +/* ***********************************************************************/ +/* process buffered item list */ +/* ***********************************************************************/ + + +/* shift relevant data in headx/'cbuf' (between 'readPos' incl and writePos non-incl) to 'start'. + * modify read/writePos accordingly */ +static picoos_int16 shift_range_left_1(spho_subobj_t *spho, picoos_int16 * from, picoos_int16 to) +{ + + /* remember shift parameters for cbuf */ + picoos_uint16 + c_i, + c_j, + c_diff, + c_writePos, + i, + j, + diff, + writePos; + i = to; + j = *from; + diff = j-i; + writePos = spho->headxWritePos; + c_i = spho->headx[to].cind; + if (j < writePos) { + c_j = spho->headx[j].cind; + } else { + c_j = spho->cbufWritePos; + } + c_diff = c_j - c_i; + c_writePos = spho->cbufWritePos; + + PICODBG_DEBUG(( + "shifting buffer region [%i,%i[ down to %i",*from, writePos, to + )); + + + /* PICODBG_ASSERT((i<j)); */ + if (i > j) { + return -1; + } + /* shift cbuf */ + while (c_j < c_writePos) { + spho->cbuf[c_i++] = spho->cbuf[c_j++]; + } + /* shift headx */ + while (j < writePos) { + spho->headx[j].cind -= c_diff; + spho->headx[i++] = spho->headx[j++]; + } + spho->headxWritePos -= diff; + *from = to; + spho->cbufWritePos -= c_diff; + /* */ + PICODBG_DEBUG(( + "readPos,WritePos are now [%i,%i[, returning shift amount %i",*from, spho->headxWritePos, diff + )); + return diff; +} + +static pico_status_t sphoAddPhoneme(register spho_subobj_t *spho, picoos_int16 pos, picoos_int16 sym) { + picoos_uint8 plane, unshifted; + /* just for debuging */ + unshifted = picotrns_unplane(sym,&plane); + PICODBG_TRACE(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,spho->phonWritePos)); + if (2* PICOTRNS_MAX_NUM_POSSYM <= spho->phonWritePos) { + /* not an error! */ + PICODBG_DEBUG(("couldn't add because phon buffer full")); + return PICO_EXC_BUF_OVERFLOW; + } else { + spho->phonBuf[spho->phonWritePos].pos = pos; + spho->phonBuf[spho->phonWritePos].sym = sym; + spho->phonWritePos++; + return PICO_OK; + } +} + +static pico_status_t sphoAddStartPhoneme(register spho_subobj_t *spho) { + return sphoAddPhoneme(spho, PICOTRNS_POS_IGNORE, + (PICOKFST_PLANE_INTERN << 8) + spho->fixedIds->phonStartId); +} + +static pico_status_t sphoAddTermPhonemes(register spho_subobj_t *spho, picoos_uint16 pos) { + return sphoAddPhoneme(spho, pos, + (PICOKFST_PLANE_PB_STRENGTHS << 8) + PICODATA_ITEMINFO1_BOUND_SEND) + && sphoAddPhoneme(spho, PICOTRNS_POS_IGNORE, + (PICOKFST_PLANE_INTERN << 8) + spho->fixedIds->phonTermId); +} + +/* return "syllable accent" (or prominence) symbol, given "word accent" symbol 'wacc' and stress value (no=0, primary=1, secondary=2) */ +static picoos_uint16 sphoGetSylAccent(register spho_subobj_t *spho, + picoos_uint8 wacc, picoos_uint8 sylStress) +{ + PICODBG_ASSERT(sylStress <= 2); + + spho = spho; /* avoid warning "var not used in this function"*/ + + switch (sylStress) { + case 0: /* non-stressed syllable gets no prominence */ + /* return spho->fixedIds->accId[0]; */ + return PICODATA_ACC0; + break; + case 1: /* primary-stressed syllable gets word prominence */ + return wacc; + break; + case 2: /* secondary-stressed syllable gets no prominence or secondary stress prom. (4) */ + return (PICODATA_ACC0 == wacc) ? PICODATA_ACC0 + : PICODATA_ACC4; + /*return (spho->fixedIds->accId[0] == wacc) ? spho->fixedIds->accId[0] + : spho->fixedIds->accId[4]; */ + break; + default: + /* never occurs :-) */ + return PICODATA_ACC0; + break; + } +} + + +/* ***********************************************************************/ +/* extract phonemes of an item into a phonBuf */ +/* ***********************************************************************/ +static pico_status_t sphoExtractPhonemes(register picodata_ProcessingUnit this, + register spho_subobj_t *spho, picoos_uint16 pos, + picoos_uint8 convertAccents, picoos_uint8 * suppressWB) +{ + pico_status_t rv = PICO_OK; + picoos_uint16 i, j; + picoos_int16 fstSymbol; + picoos_uint8 curStress; + picotrns_possym_t tmpPosSym; + picoos_uint16 oldPos, curPos; + picodata_itemhead_t * head; + picoos_uint8* content; + +#if defined(PICO_DEBUG) + picoos_char msgstr[SPHO_MSGSTR_SIZE]; +#endif + + + /* + Items considered in a transduction are a BOUND or a WORDPHON item. its starting offset within the + headxBuf is given as 'pos'. + Elements that go into the transduction receive "their" position in the buffer. + */ + + oldPos = spho->phonWritePos; + + head = &(spho->headx[pos].head); + content = spho->cbuf + spho->headx[pos].cind; + + PICODBG_TRACE(("doing item %s\n", + picodata_head_to_string(head,msgstr,SPHO_MSGSTR_SIZE))); + + switch (head->type) { + case PICODATA_ITEM_BOUND: + /* map SBEG, SEND and TERM (as sentence closing) to SEND */ + fstSymbol = (PICODATA_ITEMINFO1_BOUND_SBEG == head->info1 || PICODATA_ITEMINFO1_BOUND_TERM == head->info1) ? PICODATA_ITEMINFO1_BOUND_SEND : head->info1; + PICODBG_TRACE(("found bound of type %c\n",head->info1)); + /* BOUND(<bound strength><phrase type>) */ + /* insert bound strength */ + PICODBG_TRACE(("inserting phrase bound phoneme %c and setting suppresWB=1\n",fstSymbol)); + fstSymbol += (PICOKFST_PLANE_PB_STRENGTHS << 8); + rv = sphoAddPhoneme(spho,pos,fstSymbol); + /* phrase type not used */ + /* suppress next word boundary */ + (*suppressWB) = 1; + break; + + case PICODATA_ITEM_WORDPHON: + /* WORDPHON(POS,WACC)phon */ + PICODBG_TRACE(("found WORDPHON")); + /* insert word boundary if not suppressed */ + if (!(*suppressWB)) { + fstSymbol = (PICOKFST_PLANE_PB_STRENGTHS << 8) + PICODATA_ITEMINFO1_BOUND_PHR0; + PICODBG_TRACE(("adding word boundary phone")); + rv = sphoAddPhoneme(spho,pos,fstSymbol); + } + (*suppressWB) = 0; + /* for the time being, we force to use POS so we can transduce all fsts in a row without reconsulting the items */ + + + /* If 'convertAccents' then the accentuation is not directly encoded. It rather influences the mapping of + * the word accent symbol to the actual accent phoneme which is put after the syllable separator. */ + if (convertAccents) { + PICODBG_TRACE(("converting accents")); + /* extracting phonemes IN REVERSE order replacing syllable symbols with prominence symbols */ + curPos = spho->phonWritePos; + curStress = 0; /* no stress */ + for (i = head->len; i > 0 ;) { + i--; + if (spho->primStressId == content[i]) { + curStress = 1; + PICODBG_DEBUG(("skipping primary stress at pos %i (in 1 .. %i)",i, head->len)); + continue; /* skip primary stress symbol */ + } else if (spho->secondStressId == content[i]) { + curStress = 2; + PICODBG_DEBUG(("skipping secondary stress at pos %i (in 1 .. %i)",i, head->len)); + continue; /* skip secundary stress symbol */ + } else if (spho->syllSepId == content[i]) { + fstSymbol = (PICOKFST_PLANE_POS << 8) + head->info1; + rv = sphoAddPhoneme(spho, pos, fstSymbol); + /* replace syllSepId by combination of syllable stress and word prominence */ + fstSymbol = sphoGetSylAccent(spho,head->info2,curStress); + curStress = 0; + /* add accent */ + fstSymbol += (PICOKFST_PLANE_ACCENTS << 8); + rv = sphoAddPhoneme(spho,pos,fstSymbol); + if (PICO_OK != rv) { + break; + } + /* and keep syllable boundary */ + fstSymbol = (PICOKFST_PLANE_PHONEMES << 8) + content[i]; + } else { + /* normal phoneme */ + fstSymbol = (PICOKFST_PLANE_PHONEMES << 8) + content[i]; + } + if (PICO_OK == rv) { + rv = sphoAddPhoneme(spho,pos,fstSymbol); + } + } + if (PICO_OK == rv) { + /* bug 366: we position the "head" into the item header and not on the first phoneme + * because there might be no phonemes at all */ + /* insert head of the first syllable of a word */ + fstSymbol = (PICOKFST_PLANE_POS << 8) + head->info1; + rv = sphoAddPhoneme(spho,pos,fstSymbol); + fstSymbol = sphoGetSylAccent(spho,head->info2,curStress); + curStress = 0; + fstSymbol += (PICOKFST_PLANE_ACCENTS << 8); + rv = sphoAddPhoneme(spho,pos,fstSymbol); + } + if (PICO_OK == rv) { + /* invert sympos portion */ + i = curPos; + j=spho->phonWritePos-1; + while (i < j) { + tmpPosSym.pos = spho->phonBuf[i].pos; + tmpPosSym.sym = spho->phonBuf[i].sym; + spho->phonBuf[i].pos = spho->phonBuf[j].pos; + spho->phonBuf[i].sym = spho->phonBuf[j].sym; + spho->phonBuf[j].pos = tmpPosSym.pos; + spho->phonBuf[j].sym = tmpPosSym.sym; + i++; + j--; + } + } + } else { /* convertAccents */ + for (i = 0; i <head->len; i++) { + fstSymbol = (PICOKFST_PLANE_PHONEMES << 8) + content[i]; + rv = sphoAddPhoneme(spho,pos,fstSymbol); + } + } + break; + default: + picoos_emRaiseException(this->common->em,rv,NULL,NULL); + break; + } /* switch(head->type) */ + if (PICO_OK != rv) { + spho->phonWritePos = oldPos; + } + return rv; +} + + + + + +#define SPHO_POSSYM_OK 0 +#define SPHO_POSSYM_OUT_OF_RANGE 1 +#define SPHO_POSSYM_END 2 +#define SPHO_POSSYM_INVALID -3 +/* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside + * buf). + * 'rangeEnd' is the first possym position outside the desired range. + * Possible return values: + * SPHO_POSSYM_OK : 'pos' and 'sym' are set to the read possym, *readPos is advanced + * SPHO_POSSYM_OUT_OF_RANGE : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined + * SPHO_POSSYM_UNDERFLOW : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined + * SPHO_POSSYM_INVALID : "strange" pos. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined + */ +static pico_status_t getNextPosSym(spho_subobj_t * spho, picoos_int16 * pos, picoos_int16 * sym, + picoos_int16 rangeEnd) { + /* skip POS_IGNORE */ + while ((spho->phonReadPos < spho->phonWritePos) && (PICOTRNS_POS_IGNORE == spho->phonBuf[spho->phonReadPos].pos)) { + PICODBG_DEBUG(("ignoring phone at spho->phonBuf[%i] because it has pos==IGNORE",spho->phonReadPos)); + spho->phonReadPos++; + } + if ((spho->phonReadPos < spho->phonWritePos)) { + *pos = spho->phonBuf[spho->phonReadPos].pos; + if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) { + *sym = spho->phonBuf[spho->phonReadPos++].sym; + return SPHO_POSSYM_OK; + } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */ + return SPHO_POSSYM_INVALID; + } else { + return SPHO_POSSYM_OUT_OF_RANGE; + } + } else { + /* no more possyms to read */ + *pos = PICOTRNS_POS_INVALID; + return SPHO_POSSYM_END; + } +} + + + +/** Calculate bound strength modified by transduction + * + * Given the original bound strength 'orig' and the desired target strength 'target' (suggested by fst), + * calculate the modified bound strength. + * + * @param orig original bound strength + * @param target target bound strength + * @return resulting bound strength + */ +static picoos_uint8 fstModifiedBoundStrength(picoos_uint8 orig, picoos_uint8 target) +{ + switch (orig) { + case PICODATA_ITEMINFO1_BOUND_PHR1: + case PICODATA_ITEMINFO1_BOUND_PHR2: + /* don't allow primary phrase bounds to be demoted to word bound */ + if (PICODATA_ITEMINFO1_BOUND_PHR0 == target) { + return PICODATA_ITEMINFO1_BOUND_PHR3; + } + case PICODATA_ITEMINFO1_BOUND_PHR0: + case PICODATA_ITEMINFO1_BOUND_PHR3: + return target; + break; + default: + /* don't allow bounds other than phrase or word bounds to be changed */ + return orig; + break; + } +} + +/** Calculate bound strength modified by a \<break> command + * + * Given the original (predicted and possibly fst-modified) bound strength, and a time value from an + * overwriding \<break> command, calculate the modified bound strength. + * + * @param orig original bound strength + * @param time time given as property of \<break> command + * @param wasPrimary + * @return modified bound strength + */ +static picoos_uint8 breakModifiedBoundStrength(picoos_uint8 orig, picoos_uint16 time, picoos_bool wasPrimary) +{ + picoos_uint8 modified = (0 == time) ? PICODATA_ITEMINFO1_BOUND_PHR3 : + (50 < time) ? PICODATA_ITEMINFO1_BOUND_PHR1 : PICODATA_ITEMINFO1_BOUND_PHR2; + switch (orig) { + /* for word and phrase breaks, return 'modified', unless a non-silence gets time==0, in which + * case return no break (word break) */ + case PICODATA_ITEMINFO1_BOUND_PHR0: + if (0 == time) { + return PICODATA_ITEMINFO1_BOUND_PHR0; + } + case PICODATA_ITEMINFO1_BOUND_PHR3: + if (!wasPrimary && (0 == time)) { + return PICODATA_ITEMINFO1_BOUND_PHR0; + } + case PICODATA_ITEMINFO1_BOUND_PHR1: + case PICODATA_ITEMINFO1_BOUND_PHR2: + return modified; + break; + default: + return orig; + break; + } +} + +static picoos_bool breakStateInterrupting(picodata_itemhead_t * head, + picoos_bool * breakBefore, picoos_bool * breakAfter) { + + picoos_bool result = 1; + + *breakBefore = 0; + *breakAfter = 0; + + if (PICODATA_ITEM_WORDPHON == head->type) { + + } else if (PICODATA_ITEM_CMD == head->type) { + if ((PICODATA_ITEMINFO1_CMD_PLAY == head->info1) + || (PICODATA_ITEMINFO1_CMD_SAVE == head->info1) + || (PICODATA_ITEMINFO1_CMD_UNSAVE == head->info1)) { + *breakBefore = 1; + *breakAfter = 1; + } else if (PICODATA_ITEMINFO1_CMD_SAVE == head->info1) { + *breakBefore = 1; + } else if (PICODATA_ITEMINFO1_CMD_UNSAVE == head->info1) { + *breakAfter = 1; + } else if (PICODATA_ITEMINFO1_CMD_IGNSIG == head->info1) { + if (PICODATA_ITEMINFO2_CMD_START == head->info2) { + *breakBefore = 1; + } else { + *breakAfter = 1; + } + } + } else { + result = 0; + } + return result; +} + + +static void putSideBoundToOutput(spho_subobj_t * spho) +{ + + picodata_itemhead_t ohead; + picoos_uint8 ocontent[2*sizeof(picoos_uint16)]; + picoos_int16 sildur; + picoos_uint16 clen; + + /* create boundary */ + ohead.type = PICODATA_ITEM_BOUND; + ohead.info1 = spho->headx[spho->outReadPos].boundstrength; + ohead.info2 = spho->headx[spho->outReadPos].phrasetype; + sildur = spho->headx[spho->outReadPos].sildur; + if ((sildur < 0) + || (PICODATA_ITEMINFO1_BOUND_PHR0 == ohead.info1) + || (PICODATA_ITEMINFO1_BOUND_PHR3 == ohead.info1)) { + PICODBG_DEBUG(("outputting a bound of strength '%c' and type '%c' without duration constraints",ohead.info1, ohead.info2)); + ohead.len = 0; + } else { + picoos_uint32 pos = 0; + picoos_write_mem_pi_uint16(ocontent,&pos,sildur); + picoos_write_mem_pi_uint16(ocontent,&pos,sildur); + PICODBG_DEBUG(("outputting a bound of strength '%c' and type '%c' with duration constraints [%i,%i]",ohead.info1, ohead.info2,sildur, sildur)); + ohead.len = pos; + } + picodata_put_itemparts(&ohead, ocontent, ohead.len, + spho->outBuf, spho->outBufSize, &clen); + /* disable side bound */ + spho->headx[spho->outReadPos].boundstrength = 0; +} + +/** Set bound strength and sil dur. + * + * given the original bound strength 'orig_strength' and the fst-suggested bound strength 'fst_strength' + * and possibly being in a pending break state, calculate the resulting bound strength and set boundstrength + * and sildur of the current item (spho->headx[spho->outReadPos]) accordingly. + * if a boundstrength was set, also calculate the phrasetype and if necessary (and reachable), modify the phrase type + * of the previous phrase boundary. + * + * @param spho + * @param orig_strength + * @param orig_type + * @param fst_strength + */ +static void setSideBound(spho_subobj_t * spho, picoos_uint8 orig_strength, picoos_uint8 orig_type, picoos_uint8 fst_strength) { + picoos_uint8 strength; + + /* insert modified bound according to transduction symbol, if any */ + if (PICODATA_ITEMINFO1_NA == orig_strength) { + /* no original/fst strength given */ + orig_strength = PICODATA_ITEMINFO1_BOUND_PHR0; + strength = PICODATA_ITEMINFO1_BOUND_PHR0; + } else { + strength = fstModifiedBoundStrength(orig_strength,fst_strength); + spho->headx[spho->outReadPos].boundstrength = strength; + spho->headx[spho->outReadPos].sildur = -1; + PICODBG_DEBUG(("setting bound strength to fst-suggested value %c (was %c)",strength, spho->headx[spho->outReadPos].boundstrength, spho->breakTime)); + } + + /* insert modified bound according to pending break, if any */ + if (spho->breakPending) { + /* the calculation is based on the fst-modified value (because this is what the customer wants to + * override) + */ + strength = breakModifiedBoundStrength(strength, spho->breakTime, (PICODATA_ITEMINFO1_BOUND_PHR1 == orig_strength)); + PICODBG_DEBUG(("setting bound strength to break-imposed value %c (was %c) and time to %i",strength, spho->headx[spho->outReadPos].boundstrength, spho->breakTime)); + spho->headx[spho->outReadPos].boundstrength = strength; + spho->headx[spho->outReadPos].sildur = spho->breakTime; + spho->breakPending = FALSE; + } + if (spho->headx[spho->outReadPos].boundstrength) { + /* we did set a bound strength, possibly promoting or demoting a boundary; now set the phrase type + * possibly also changing the phrase type of the previous phrase bound + */ + picoos_uint8 fromPhrase = ((PICODATA_ITEMINFO1_BOUND_PHR0 != orig_strength)); + picoos_uint8 toPhrase = ((PICODATA_ITEMINFO1_BOUND_PHR0 != strength)); + + PICODBG_DEBUG(("setting phrase type (wasPhrase=%i, isPhrase=%i)",fromPhrase,toPhrase)); + if (toPhrase) { + if (fromPhrase) { + spho->lastPhraseType = orig_type; + } else { /*promote */ + if (spho->activeStartPos <= spho->lastPhraseBoundPos) { + /* we still can change prev phrase bound */ + /* since a new phrase boundary is introduced, we have to 'invent' + * an additional phrase type here. For that, we have to use some of the + * knowledge that otherwise is handled in picoacph. + */ + spho->headx[spho->lastPhraseBoundPos].phrasetype + = PICODATA_ITEMINFO2_BOUNDTYPE_P; + } + } + spho->lastPhraseBoundPos = spho->outReadPos; + spho->headx[spho->lastPhraseBoundPos].phrasetype + = spho->lastPhraseType; + + } else { + spho->headx[spho->outReadPos].phrasetype = PICODATA_ITEMINFO2_NA; + if (fromPhrase) { /* demote */ + spho->lastPhraseType = orig_type; + if (spho->activeStartPos <= spho->lastPhraseBoundPos) { + /* we still can change prev phrase bound */ + spho->headx[spho->lastPhraseBoundPos].phrasetype + = spho->lastPhraseType; + } + } + } + } +} + + +/* ***********************************************************************/ +/* sphoStep function */ +/* ***********************************************************************/ + + +static picodata_step_result_t sphoStep(register picodata_ProcessingUnit this, + picoos_int16 mode, picoos_uint16 * numBytesOutput) +{ + + register spho_subobj_t *spho; + pico_status_t rv= PICO_OK; + picoos_uint16 blen; + picodata_itemhead_t ihead, ohead; + picoos_uint8 *icontent; + picoos_uint16 nextInPos; +#if defined(PICO_DEBUG) + picoos_char msgstr[SPHO_MSGSTR_SIZE]; +#endif + + /* used in FEED and FEED_SYM */ + picoos_uint16 clen; + picoos_int16 pos, sym, sylsym; + picoos_uint8 plane; + + /* used in BOUNDS */ + picoos_bool breakBefore, breakAfter; + + /* pico_status_t rvP= PICO_OK; */ + + picoos_uint16 curPos /*, nextPos */; + picoos_uint16 remHeadxSize, remCbufSize; + + + if (NULL == this || NULL == this->subObj) { + return PICODATA_PU_ERROR; + } + spho = (spho_subobj_t *) this->subObj; + + mode = mode; /* avoid warning "var not used in this function"*/ + + *numBytesOutput = 0; + while (1) { /* exit via return */ + PICODBG_INFO(("doing state %i, headxReadPos: %d, headxWritePos: %d", + spho->procState, spho->headxReadPos, spho->headxWritePos)); + + switch (spho->procState) { + + case SPHO_STEPSTATE_INIT: + /* **********************************************************************/ + /* INIT */ + /* **********************************************************************/ + PICODBG_DEBUG(("INIT")); + /* (re)set values for PARSE */ + spho->penultima = SPHO_POS_INVALID; + spho->activeEndPos = SPHO_POS_INVALID; + spho->headxReadPos = 0; + spho->phonReadPos = 0; + spho->phonWritePos = 0; + spho->lastPhraseType = PICODATA_ITEMINFO2_NA; + spho->lastPhraseBoundPos = -1; + + spho->procState = SPHO_STEPSTATE_COLLECT; + break; + + + case SPHO_STEPSTATE_COLLECT: + /* **********************************************************************/ + /* COLLECT */ + /* **********************************************************************/ + /* collect state: get items from charBuf and store in + * internal inBuf + */ + PICODBG_TRACE(("COLLECT")); + rv = PICO_OK; + remHeadxSize = spho->headxBufSize - spho->headxWritePos; + remCbufSize = spho->cbufBufSize - spho->cbufWritePos; + curPos = spho->headxWritePos; + while ((PICO_OK == rv) && (remHeadxSize > 0) && (remCbufSize > 0)) { + PICODBG_DEBUG(("COLLECT getting item at headxWritePos %i (remaining %i)",spho->headxWritePos, remHeadxSize)); + rv = picodata_cbGetItem(this->cbIn, spho->tmpbuf, PICODATA_MAX_ITEMSIZE, &blen); + if (PICO_OK == rv) { + rv = picodata_get_itemparts(spho->tmpbuf, + PICODATA_MAX_ITEMSIZE, &(spho->headx[spho->headxWritePos].head), + &(spho->cbuf[spho->cbufWritePos]), remCbufSize, &blen); + if (PICO_OK == rv) { + spho->headx[spho->headxWritePos].cind = spho->cbufWritePos; + spho->headx[spho->headxWritePos].boundstrength = 0; + spho->headxWritePos++; + remHeadxSize--; + spho->cbufWritePos += blen; + remCbufSize -= blen; + } + } + } + if ((PICO_OK == rv) && ((remHeadxSize <= 0) || (remCbufSize <= 0))) { + rv = PICO_EXC_BUF_OVERFLOW; + } + + /* in normal circumstances, rv is either PICO_EOF (no more items in cbIn) or PICO_BUF_OVERFLOW + * (if no more items fit into headx) */ + if ((PICO_EOF != rv) && (PICO_EXC_BUF_OVERFLOW != rv)) { + PICODBG_DEBUG(("COLLECT ** problem getting item, unhandled, rv: %i", rv)); + picoos_emRaiseException(this->common->em, rv, + NULL, NULL); + return PICODATA_PU_ERROR; + } + if (PICO_EOF == rv) { /* there are no more items available */ + if (curPos < spho->headxWritePos) { /* we did get some new items */ + PICODBG_DEBUG(("COLLECT read %i items", + spho->headxWritePos - curPos)); + spho->needMoreInput = FALSE; + } + if (spho->needMoreInput) { /* not enough items to proceed */ + PICODBG_DEBUG(("COLLECT need more data, returning IDLE")); + return PICODATA_PU_IDLE; + } else { + spho->procState = SPHO_STEPSTATE_PROCESS_PARSE; + /* uncomment next to split into two steps */ + /* return PICODATA_PU_ATOMIC; */ + } + } else { /* input buffer full */ + PICODBG_DEBUG(("COLLECT input buffer full")); + if (spho->needMoreInput) { /* forced output because we can't get more data */ + spho->needMoreInput = FALSE; + spho->force = TRUE; + } + spho->procState = SPHO_STEPSTATE_PROCESS_PARSE; + } + break; + + case SPHO_STEPSTATE_PROCESS_PARSE: + + /* **********************************************************************/ + /* PARSE: items -> input pos/phon pairs */ + /* **********************************************************************/ + + /* parse one item at a time */ + /* If + * - the item is a sentence end or + * - it is the last item and force=1 or + * - the phon buffer is full + * then set inReadPos to 0 and go to TRANSDUCE + * else advance by one item */ + + /* look at the current item */ + PICODBG_TRACE(("PARSE")); + if (spho->headxReadPos >= spho->headxWritePos) { + /* no more items in headx */ + if (spho->force) { + PICODBG_INFO(("no more items in headx but we are forced to transduce")); + + /* headx is full; we are forced to transduce before reaching the sentence end */ + spho->force = FALSE; + if (SPHO_POS_INVALID == spho->activeEndPos) { + spho->activeEndPos = spho->headxReadPos; + } + spho->procState = SPHO_STEPSTATE_PROCESS_TRANSDUCE; + } else { + /* we try to get more data */ + PICODBG_INFO(("no more items in headx, try to collect more")); + spho->needMoreInput = TRUE; + spho->procState = SPHO_STEPSTATE_COLLECT; + } + break; + } + + ihead = spho->headx[spho->headxReadPos].head; + icontent = spho->cbuf + spho->headx[spho->headxReadPos].cind; + + PICODBG_DEBUG(("PARSE looking at item %s",picodata_head_to_string(&ihead,msgstr,SPHO_MSGSTR_SIZE))); + /* treat header */ + if (PICODATA_ITEM_BOUND == ihead.type) { + /* see if it is a sentence end or termination boundary (flush) */ + if ((PICODATA_ITEMINFO1_BOUND_SEND == ihead.info1) + || (PICODATA_ITEMINFO1_BOUND_TERM == ihead.info1)) { + PICODBG_INFO(("PARSE found sentence end or term BOUND")); + + if (spho->sentenceStarted) { + /* its the end of the sentence */ + PICODBG_INFO(("PARSE found sentence end")); + spho->sentenceStarted = 0; + /* there is no need for a right context; move the active end to the end */ + /* add sentence termination phonemes */ + sphoAddTermPhonemes(spho, spho->headxReadPos); + spho->headxReadPos++; + spho->activeEndPos = spho->headxReadPos; + /* we may discard all information up to activeEndPos, after processing of last + * sentence part + */ + spho->penultima = spho->activeEndPos; + + /* transduce */ + spho->procState = SPHO_STEPSTATE_PROCESS_TRANSDUCE; + /* uncomment to split */ + /* return PICODATA_PU_BUSY; */ + break; + } else { + if (PICODATA_ITEMINFO1_BOUND_TERM == ihead.info1) { + /* its the end of input (flush) */ + PICODBG_INFO(("PARSE forwarding input end (flush)")); + /* copy item unmodified */ + picodata_put_itemparts(&ihead, + icontent, + ihead.len, + spho->outBuf, spho->outBufSize, + &clen); + + spho->headxReadPos++; + spho->activeEndPos = spho->headxReadPos; + spho->penultima = SPHO_POS_INVALID; + spho->feedFollowState = SPHO_STEPSTATE_SHIFT; + spho->procState = SPHO_STEPSTATE_FEED; + break; + } else { + /* this should never happen */ + /* eliminate bound */ + spho->headxReadPos++; + spho->activeEndPos = spho->headxReadPos; + spho->penultima = SPHO_POS_INVALID; + PICODBG_ERROR(("PARSE found a sentence end without a sentence start; eliminated")); + } + } + } else if (PICODATA_ITEMINFO1_BOUND_SBEG == ihead.info1) { + /* its the start of the sentence */ + PICODBG_INFO(("PARSE found sentence start")); + /* add sentence starting phoneme */ + sphoAddStartPhoneme(spho); + + spho->sentenceStarted = 1; + } + } + + if ((PICODATA_ITEM_WORDPHON == ihead.type) + || (PICODATA_ITEM_BOUND == ihead.type)) { + /* if it is a word or a bound try to extract phonemes */ + PICODBG_INFO(("PARSE found WORD phon or phrase BOUND")); + rv = sphoExtractPhonemes(this, spho, spho->headxReadPos, + TRUE /* convertAccents */, + &spho->suppressParseWordBound); + if (PICO_OK == rv) { + PICODBG_INFO(("PARSE successfully returned from phoneme extraction")); + /* replace activeEndPos if the new item is a word, or activeEndPos was not set yet, or + * activeEndPos was a bound */ + if ((spho->activeStartPos <= spho->headxReadPos) && ((PICODATA_ITEM_WORDPHON == ihead.type) + || (SPHO_POS_INVALID == spho->activeEndPos) + || (PICODATA_ITEM_BOUND == spho->headx[spho->activeEndPos].head.type))) { + PICODBG_INFO(("PARSE found new activeEndPos: %i,%i -> %i,%i", + spho->penultima,spho->activeEndPos,spho->activeEndPos,spho->headxReadPos)); + spho->penultima = spho->activeEndPos; + spho->activeEndPos = spho->headxReadPos; + } + + } else if (PICO_EXC_BUF_OVERFLOW == rv) { + /* phoneme buffer cannot take this item anymore; + if the phoneme buffer has some contents, we are forced to transduce before reaching the sentence end + else we skip the (too long word) */ + PICODBG_INFO(("PARSE returned from phoneme extraction with overflow, number of phonemes in phonBuf: %i; forced to TRANSDUCE", spho->phonWritePos)); + if ((SPHO_POS_INVALID == spho->activeEndPos) || (spho->activeStartPos == spho->activeEndPos)) { + spho->activeEndPos = spho->headxReadPos; + } + spho->procState = SPHO_STEPSTATE_PROCESS_TRANSDUCE; + break; + } else { + PICODBG_ERROR(("PARSE returned from phoneme extraction with exception %i",rv)); + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, + PICO_ERR_OTHER, NULL, NULL); + } + } else { + PICODBG_INFO(("PARSE found other item, passing over")); + /* it is "other" item, ignore */ + } + /* set pos at next item */ + PICODBG_INFO(("PARSE going to next item: %i -> %i",spho->headxReadPos, spho->headxReadPos + 1)); + spho->headxReadPos++; + break; + + case SPHO_STEPSTATE_PROCESS_TRANSDUCE: + + /* **********************************************************************/ + /* TRANSDUCE: transduction input pos/phon pairs to output pos/phon pairs */ + /* **********************************************************************/ + PICODBG_DEBUG(("TRANSDUCE (%i-th of %i fsts",spho->curFst+1, spho->numFsts)); + + /* termination condition first */ + if (spho->curFst >= spho->numFsts) { + +#if defined(PICO_DEBUG) + { + PICODBG_INFO_CTX(); + PICODBG_INFO_MSG(("result of all transductions: ")); + PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], spho->phonBufOut, spho->phonWritePos); + PICODBG_INFO_MSG(("\n")); + } +#endif + + /* reset for next transduction */ + spho->curFst = 0; + /* prepare BOUNDS */ + spho->outReadPos = 0; + spho->phonReadPos = 0; + + spho->procState = SPHO_STEPSTATE_PROCESS_BOUNDS; + break; + } + + /* transduce from phonBufIn to PhonBufOut */ + { + + picoos_uint32 nrSteps; +#if defined(PICO_DEBUG) + { + PICODBG_INFO_CTX(); + PICODBG_INFO_MSG(("spho trying to transduce: ")); + PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], spho->phonBuf, spho->phonWritePos); + PICODBG_INFO_MSG(("\n")); + } +#endif + rv = picotrns_transduce(spho->fst[spho->curFst], FALSE, + picotrns_printSolution, spho->phonBuf, spho->phonWritePos, spho->phonBufOut, + &spho->phonWritePos, + 4*PICOTRNS_MAX_NUM_POSSYM, spho->altDescBuf, + spho->maxAltDescLen, &nrSteps); + if (PICO_OK == rv) { +#if defined(PICO_DEBUG) + { + PICODBG_INFO_CTX(); + PICODBG_INFO_MSG(("result of transduction: (output symbols: %i)", spho->phonWritePos)); + PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], spho->phonBufOut, spho->phonWritePos); + PICODBG_INFO_MSG(("\n")); + } +#endif + PICODBG_TRACE(("number of steps done in tranduction: %i", nrSteps)); + } else { + picoos_emRaiseWarning(this->common->em, PICO_WARN_FALLBACK,NULL,(picoos_char *)"phon buffer full"); + } + } + /* eliminate deep epsilons */ + picotrns_eliminate_epsilons(spho->phonBufOut, spho->phonWritePos, spho->phonBuf, + &spho->phonWritePos,4*PICOTRNS_MAX_NUM_POSSYM); + + spho->curFst++; + + /* return PICODATA_PU_ATOMIC */ + break; + + + case SPHO_STEPSTATE_PROCESS_BOUNDS: + /* ************************************************************************/ + /* BOUNDS: combine input item with pos/phon pairs to insert/modify bounds */ + /* ************************************************************************/ + + PICODBG_INFO(("BOUNDS")); + + /* get the suppressRecombWordBound in the left context */ + spho->suppressRecombWordBound = FALSE; + while (spho->outReadPos < spho->activeStartPos) { + /* look at the current item */ + ihead = spho->headx[spho->outReadPos].head; + /* icontent = spho->cbuf + spho->headx[spho->outReadPos].cind; */ + PICODBG_INFO(("in position %i, looking at item %s",spho->outReadPos,picodata_head_to_string(&ihead,msgstr,SPHO_MSGSTR_SIZE))); + if (PICODATA_ITEM_BOUND == ihead.type) { + spho->suppressRecombWordBound = TRUE; + } else if (PICODATA_ITEM_WORDPHON == ihead.type) { + spho->suppressRecombWordBound = FALSE; + } + spho->outReadPos++; + } + /* spho->outReadPos point now to the active region */ + + /* advance the phone reading pos to the active range */ + spho->phonReadPos = 0; + while (SPHO_POSSYM_OK == (rv = getNextPosSym(spho, &pos, &sym, + spho->activeStartPos))) { + /* ignore */ + } + PICODBG_INFO(("skipping left context phones results in %s", (SPHO_POSSYM_OUT_OF_RANGE==rv) ? "OUT_OF_RANGE" : (SPHO_POSSYM_END ==rv) ? "END" : "OTHER")); + + /* + * Align input items with transduced phones and note bound stregth changes and break commands + */ + + while (spho->outReadPos < spho->activeEndPos) { + + /* look at the current item */ + ihead = spho->headx[spho->outReadPos].head; + icontent = spho->cbuf + spho->headx[spho->outReadPos].cind; + nextInPos = spho->outReadPos + 1; + /* */ + PICODBG_INFO(("in position %i, looking at item %s",spho->outReadPos,picodata_head_to_string(&ihead,msgstr,SPHO_MSGSTR_SIZE))); + + if ((PICODATA_ITEM_BOUND == ihead.type) + || ((PICODATA_ITEM_WORDPHON == ihead.type) + && (!spho->suppressRecombWordBound))) { + /* there was a boundary originally */ + picoos_uint8 orig_strength, orig_type; + if (PICODATA_ITEM_BOUND == ihead.type) { + orig_strength = ihead.info1; + orig_type = ihead.info2; + spho->suppressRecombWordBound = TRUE; + } else { + orig_strength = PICODATA_ITEMINFO1_BOUND_PHR0; + orig_type = PICODATA_ITEMINFO2_NA; + } + /* i expect a boundary phone here */ + /* consume FST bound phones, consider pending break and set the side-bound */ + PICODBG_INFO(("got BOUND or WORDPHON item and expects corresponding phone")); + rv = getNextPosSym(spho, &pos, &sym, nextInPos); + if (SPHO_POSSYM_OK != rv) { + PICODBG_ERROR(("unexpected symbol or unexpected end of phoneme list (%s)", (SPHO_POSSYM_OUT_OF_RANGE==rv) ? "OUT_OF_RANGE" : (SPHO_POSSYM_END ==rv) ? "END" :"OTHER")); + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, + PICO_ERR_OTHER, NULL, NULL); + } + sym = picotrns_unplane(sym, &plane); + /* */ + PICODBG_ASSERT((PICOKFST_PLANE_PB_STRENGTHS == plane)); + + /* insert modified bound according to transduction and possibly pending break */ + setSideBound(spho, orig_strength, orig_type, + (picoos_uint8) sym); + } else if ((PICODATA_ITEM_CMD == ihead.type) + && (PICODATA_ITEMINFO1_CMD_SIL == ihead.info1)) { + /* it's a SIL (break) command */ + picoos_uint16 time; + picoos_uint32 pos = 0; + picoos_read_mem_pi_uint16(icontent, &pos, &time); + if (spho->breakPending) { + spho->breakTime += time; + } else { + spho->breakTime = time; + spho->breakPending = TRUE; + } + } else if ((PICODATA_ITEM_CMD == ihead.type) && (PICODATA_ITEMINFO1_CMD_PLAY == ihead.info1)) { + /* insert break of at least one ms */ + if (!spho->breakPending || (spho->breakTime <= 0)) { + spho->breakTime = SPHO_SMALLEST_SIL_DUR; + spho->breakPending = TRUE; + } + setSideBound(spho, PICODATA_ITEMINFO1_NA, + PICODATA_ITEMINFO2_NA, PICODATA_ITEMINFO1_NA); + /* force following break to be at least one ms */ + spho->breakTime = SPHO_SMALLEST_SIL_DUR; + spho->breakPending = TRUE; + } else if (breakStateInterrupting(&ihead, &breakBefore, &breakAfter)) { + + if (breakBefore &&(!spho->breakPending || (spho->breakTime <= 0))) { + spho->breakTime = SPHO_SMALLEST_SIL_DUR; + spho->breakPending = TRUE; + } + setSideBound(spho, PICODATA_ITEMINFO1_NA, + PICODATA_ITEMINFO2_NA, PICODATA_ITEMINFO1_NA); + + if (breakAfter) { + spho->breakTime = SPHO_SMALLEST_SIL_DUR; + spho->breakPending = TRUE; + } + if (PICODATA_ITEM_WORDPHON == ihead.type) { + spho->suppressRecombWordBound = FALSE; + } + } + + /* skip phones of that item */ + while (SPHO_POSSYM_OK == (rv = getNextPosSym(spho, &pos, + &sym, nextInPos))) { + /* ignore */ + } + spho->outReadPos++; + } + + /* reset for RECOMB */ + spho->outReadPos = 0; + spho->phonReadPos = 0; + spho->suppressRecombWordBound = FALSE; + + spho->procState = SPHO_STEPSTATE_PROCESS_RECOMB; + return PICODATA_PU_ATOMIC; + + break; + + case SPHO_STEPSTATE_PROCESS_RECOMB: + /* **********************************************************************/ + /* RECOMB: combine input item with pos/phon pairs to output item */ + /* **********************************************************************/ + + PICODBG_TRACE(("RECOMB")); + + /* default place to come after feed: here */ + spho->feedFollowState = SPHO_STEPSTATE_PROCESS_RECOMB; + + /* check termination condition first */ + if (spho->outReadPos >= spho->activeEndPos) { + PICODBG_DEBUG(("RECOMB reached active region's end at %i",spho->outReadPos)); + spho->procState = SPHO_STEPSTATE_SHIFT; + break; + } + + /* look at the current item */ + ihead = spho->headx[spho->outReadPos].head; + icontent = spho->cbuf + spho->headx[spho->outReadPos].cind; + + PICODBG_DEBUG(("RECOMB looking at item %s",picodata_head_to_string(&ihead,msgstr,SPHO_MSGSTR_SIZE))); + + nextInPos = spho->outReadPos + 1; + + PICODBG_DEBUG(("RECOMB treating item in headx at pos %i",spho->outReadPos)); + if (nextInPos <= spho->activeStartPos) { /* we're in the (passive) left context. Just skip it */ + PICODBG_DEBUG(("RECOMB skipping item in the left context (%i <= %i)",nextInPos, spho->activeStartPos)); + if (PICODATA_ITEM_BOUND == ihead.type) { + spho->suppressRecombWordBound = 1; + } else if (PICODATA_ITEM_WORDPHON == ihead.type) { + spho->suppressRecombWordBound = 0; + } + + /* consume possyms */ + while (SPHO_POSSYM_OK == (rv = getNextPosSym(spho,&pos,&sym,nextInPos))) { + /* ignore */ + } + if (rv == SPHO_POSSYM_INVALID) { + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, + PICO_ERR_OTHER, NULL, NULL); + } + spho->outReadPos = nextInPos; + } else { /* active region */ + if (spho->headx[spho->outReadPos].boundstrength) { +/* ***************** "side-bound" *********************/ + /* copy to outbuf */ + putSideBoundToOutput(spho); + /* mark as processed */ + spho->headx[spho->outReadPos].boundstrength = 0; + /* output it */ + spho->procState = SPHO_STEPSTATE_FEED; + } else if (PICODATA_ITEM_BOUND == ihead.type) { +/* ***************** BOUND *********************/ + /* expect a boundary phone here */ + PICODBG_DEBUG(("RECOMB got BOUND item and expects corresponding phone")); + rv = getNextPosSym(spho, &pos, &sym, nextInPos); + if (SPHO_POSSYM_OK != rv) { + PICODBG_ERROR(("unexpected symbol or unexpected end of phoneme list")); + return (picodata_step_result_t)picoos_emRaiseException( + this->common->em, PICO_ERR_OTHER, NULL, + NULL); + } + sym = picotrns_unplane(sym, &plane); + /* */ + PICODBG_ASSERT((PICOKFST_PLANE_PB_STRENGTHS == plane)); + + spho->suppressRecombWordBound = TRUE; /* if word following, don't need word boundary */ + /* just consume item and come back here*/ + spho->outReadPos = nextInPos; + + } else if (PICODATA_ITEM_WORDPHON == ihead.type) { +/* ***************** WORDPHON *********************/ + spho->wordStarted = TRUE; + /* i expect a word boundary symbol in this range unless a phrase boundary was encountered before */ + if (spho->suppressRecombWordBound) { + PICODBG_DEBUG(("RECOMB got WORDPHON item but skips expecting BOUND")); + spho->suppressRecombWordBound = FALSE; + } else { + PICODBG_DEBUG(("RECOMB got WORDPHON item and expects corresponding bound phone")); + rv = getNextPosSym(spho, &pos, &sym, nextInPos); + if (SPHO_POSSYM_OK != rv) { + PICODBG_ERROR(("unexpected symbol or unexpected end of phoneme list")); + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, + PICO_ERR_OTHER, NULL, NULL); + } + } + spho->procState = SPHO_STEPSTATE_PROCESS_SYL; + } else if ((PICODATA_ITEM_CMD == ihead.type) && (PICODATA_ITEMINFO1_CMD_SIL == ihead.info1)) { +/* ***************** BREAK COMMAND *********************/ + /* just consume and come back here */ + PICODBG_DEBUG(("RECOMB consuming item from inBuf %i -> %i",spho->outReadPos, nextInPos)); + spho->outReadPos = nextInPos; + } else { +/* ***************** OTHER *********************/ + /* just copy item */ + PICODBG_DEBUG(("RECOMB found other item, just copying")); + picodata_put_itemparts(&ihead, icontent, ihead.len, + spho->outBuf, spho->outBufSize, &clen); + PICODBG_DEBUG(("RECOMB consuming item from inBuf %i -> %i",spho->outReadPos, nextInPos)); + spho->outReadPos = nextInPos; + /* and output it */ + spho->procState = SPHO_STEPSTATE_FEED; + } /* if (ihead.type) */ + + } + + /* return PICODATA_PU_BUSY; */ + break; + + case SPHO_STEPSTATE_PROCESS_SYL: + /* **********************************************************************/ + /* SYL: combine input word item with pos/phon pairs to syl output item */ + /* **********************************************************************/ + + /* consume all transduced phonemes with pos in in the range [spho->outReadPos,nextInPos[ */ + PICODBG_DEBUG(("SYL")); + + spho->feedFollowState = SPHO_STEPSTATE_PROCESS_SYL; + + /* look at the current item */ + ihead = spho->headx[spho->outReadPos].head; + icontent = spho->cbuf + spho->headx[spho->outReadPos].cind; + nextInPos = spho->outReadPos + 1; + PICODBG_DEBUG(("SYL (1) treating item in headx at pos %i",spho->outReadPos)); + /* create syllable item in ohead (head) and sylBuf (contents) */ + ohead.type = PICODATA_ITEM_SYLLPHON; + + PICODBG_TRACE(("SYL expects accent at phonBuf[%i] = (%i,%i) (outReadPos=%i)", spho->phonReadPos, spho->phonBuf[spho->phonReadPos].pos, spho->phonBuf[spho->phonReadPos].sym,spho->outReadPos)); + rv = getNextPosSym(spho,&pos,&sym,nextInPos); + if (SPHO_POSSYM_OK != rv) { + PICODBG_ERROR(("unexpected symbol or unexpected end of phoneme list (%i)",rv)); + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL); + } + ohead.info2 = picotrns_unplane(sym, &plane); + PICODBG_ASSERT((PICOKFST_PLANE_ACCENTS == plane)); + PICODBG_DEBUG(("SYL sets accent to %c", sym)); + + /* for the time being, we force to use POS so we can transduce all fsts in a row without reconsulting the items */ + PICODBG_TRACE(("SYL expects POS")); + PICODBG_DEBUG(("SYL (2) treating item in inBuf range [%i,%i[",spho->outReadPos,nextInPos)); + rv = getNextPosSym(spho,&pos,&sym,nextInPos); + if (SPHO_POSSYM_OK != rv) { + PICODBG_ERROR(("unexpected symbol or unexpected end of phoneme list")); + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL); + } + if (spho->wordStarted) { + spho->wordStarted = FALSE; + ohead.info1 = picotrns_unplane(sym, &plane); + /* */ + PICODBG_ASSERT(PICOKFST_PLANE_POS == plane); + /* */ + PICODBG_DEBUG(("SYL setting POS to %c", ohead.info1)); + } else { + ohead.info1 = PICODATA_ITEMINFO1_NA; + } + + PICODBG_DEBUG(("SYL (3) treating item in inBuf range [%i,%i[",spho->outReadPos,nextInPos)); + /* get phonemes of that syllable; stop if syllable boundary or outside word */ + sylsym = (PICOKFST_PLANE_PHONEMES << 8) + + spho->syllSepId; + PICODBG_DEBUG(("collecting syllable phonemes before headx position %i",nextInPos)); + spho->sylWritePos = 0; + while (SPHO_POSSYM_OK == (rv = getNextPosSym(spho,&pos,&sym,nextInPos)) && (sym != sylsym)) { + spho->sylBuf[spho->sylWritePos++] = picotrns_unplane(sym, &plane); + /* */ + PICODBG_TRACE(("SYL adding phoneme to syllable: (pos %i,sym %i)[plane %i,sym %c]",pos,sym,plane,sym & 0xFF)); + PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane)); + } + PICODBG_DEBUG(("SYL (4) treating item in inBuf range [%i,%i[",spho->outReadPos,nextInPos)); + ohead.len = spho->sylWritePos; + if (SPHO_POS_INVALID == rv) { + PICODBG_ERROR(("unexpected symbol or unexpected end of phoneme list")); + return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL); + } else if ((SPHO_POSSYM_OUT_OF_RANGE == rv) || (SPHO_POSSYM_END == rv)) { + PICODBG_DEBUG(("SYL arrived at end of word and/or end of phon buffer, go to next word")); + spho->outReadPos = nextInPos; /* advance to next item */ + spho->feedFollowState = SPHO_STEPSTATE_PROCESS_RECOMB; /* go to RECOMB after feed */ + } else { + PICODBG_ASSERT((sym == sylsym)); + } + PICODBG_DEBUG(("SYL (5) treating item in inBuf range [%i,%i[",spho->outReadPos,nextInPos)); + + if (ohead.len > 0) { + /* prepare syllable output */ + picodata_put_itemparts(&ohead, spho->sylBuf, + PICODATA_BUFSIZE_DEFAULT, spho->outBuf, + spho->outBufSize, &clen); + + spho->procState = SPHO_STEPSTATE_FEED; + } else { /* skip feeding output of empty syllable */ + spho->procState = spho->feedFollowState; + } + break; + + case SPHO_STEPSTATE_FEED: + /* **********************************************************************/ + /* FEED: output output item and proceed to feedFollowState */ + /* **********************************************************************/ + + PICODBG_DEBUG(("FEED")); + + PICODBG_DEBUG(("FEED putting outBuf item into cb")); + + /*feeding items to PU output buffer*/ + rv = picodata_cbPutItem(this->cbOut, spho->outBuf, + spho->outBufSize, &clen); + + PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], + (picoos_uint8 *)"spho: ", + spho->outBuf, spho->outBufSize); + + if (PICO_EXC_BUF_OVERFLOW == rv) { + /* we have to redo this item */ + PICODBG_DEBUG(("FEED got overflow, returning ICODATA_PU_OUT_FULL")); + return PICODATA_PU_OUT_FULL; + } else if (PICO_OK == rv) { + *numBytesOutput += clen; + spho->procState = spho->feedFollowState; + PICODBG_DEBUG(("FEED ok, going back to procState %i", spho->procState)); + return PICODATA_PU_BUSY; + } else { + PICODBG_DEBUG(("FEED got exception %i when trying to output item",rv)); + spho->procState = spho->feedFollowState; + return (picodata_step_result_t)rv; + } + break; + + case SPHO_STEPSTATE_SHIFT: + /* **********************************************************************/ + /* SHIFT */ + /* **********************************************************************/ + /* If there exists a valid penultima, it should replace any left context (from 0 to activeStartPos) + * else discard the current active range (from activeStartPos to activeEndPos), leaving the current + * left context intact. Often, PARSE would move activeStartPos to 0, so that there is no left context + * after the shift. + */ + + PICODBG_DEBUG(("SHIFT")); + + if (spho->penultima != SPHO_POS_INVALID) { + picoos_int16 shift; + /* set penultima as new left context and set activeStartPos to the shifted activeEndPos */ + PICODBG_DEBUG(( + "SHIFT shifting penultima from %i to 0", + spho->penultima)); + shift = shift_range_left_1(spho, &spho->penultima, 0); + if (shift < 0) { + picoos_emRaiseException(this->common->em,PICO_ERR_OTHER,NULL,NULL); + return PICODATA_PU_ERROR; + } + spho->activeStartPos = spho->activeEndPos + - shift; + spho->lastPhraseBoundPos -= shift; + spho->suppressParseWordBound = FALSE; + spho->suppressRecombWordBound = FALSE; + + } else { + picoos_int16 shift; + picoos_bool lastPhraseBoundActive; + if (spho->activeStartPos == spho->activeEndPos) { + /* no items consumed; we have to abandon left context */ + spho->activeStartPos = 0; + } + lastPhraseBoundActive = (spho->lastPhraseBoundPos >= spho->activeStartPos); + /* dummy comment */ + PICODBG_DEBUG(("SHIFT shift active end from %i to %i", + spho->activeEndPos, spho->activeStartPos)); + shift = shift_range_left_1(spho, &spho->activeEndPos, spho->activeStartPos); + if (shift < 0) { + picoos_emRaiseException(this->common->em,PICO_ERR_OTHER,NULL,NULL); + return PICODATA_PU_ERROR; + } + if (lastPhraseBoundActive) { + spho->lastPhraseBoundPos -= shift; + } + } + + spho->procState = SPHO_STEPSTATE_INIT; + break; + + default: + picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL); + return PICODATA_PU_ERROR; + break; + + } /* switch (spho->procState) */ + + } /* while (1) */ + + /* should be never reached */ + picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL); + return PICODATA_PU_ERROR; +} + +#ifdef __cplusplus +} +#endif + +/* end picospho.c */ |