summaryrefslogtreecommitdiffstats
path: root/pico/lib/picosa.c
diff options
context:
space:
mode:
authorCharles Chen <clchen@google.com>2009-06-22 16:25:25 -0700
committerCharles Chen <clchen@google.com>2009-06-22 17:14:37 -0700
commit1284d937084a20b457c280259fff59391129509a (patch)
tree5630028284c450b56a56b187d9c99cf7ebcee9cc /pico/lib/picosa.c
parentf605ee98e5e03144c25a92af7e5d2a3ec33d375f (diff)
downloadexternal_svox-1284d937084a20b457c280259fff59391129509a.zip
external_svox-1284d937084a20b457c280259fff59391129509a.tar.gz
external_svox-1284d937084a20b457c280259fff59391129509a.tar.bz2
Moving PicoTts plugin under the pico directory of external/svox
Diffstat (limited to 'pico/lib/picosa.c')
-rw-r--r--pico/lib/picosa.c1738
1 files changed, 1738 insertions, 0 deletions
diff --git a/pico/lib/picosa.c b/pico/lib/picosa.c
new file mode 100644
index 0000000..3147c76
--- /dev/null
+++ b/pico/lib/picosa.c
@@ -0,0 +1,1738 @@
+/*
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file picosa.c
+ *
+ * sentence analysis - POS disambiguation
+ *
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ * All rights reserved.
+ *
+ * History:
+ * - 2009-04-20 -- initial version
+ *
+ */
+
+#include "picoos.h"
+#include "picodbg.h"
+#include "picobase.h"
+#include "picokdt.h"
+#include "picoklex.h"
+#include "picoktab.h"
+#include "picokfst.h"
+#include "picotrns.h"
+#include "picodata.h"
+#include "picosa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+}
+#endif
+
+
+/* PU saStep states */
+#define SA_STEPSTATE_COLLECT 0
+#define SA_STEPSTATE_PROCESS_POSD 10
+#define SA_STEPSTATE_PROCESS_WPHO 11
+#define SA_STEPSTATE_PROCESS_TRNS_PARSE 12
+#define SA_STEPSTATE_PROCESS_TRNS_FST 13
+#define SA_STEPSTATE_FEED 2
+
+#define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2))
+
+#define SA_MSGSTR_SIZE 32
+
+/* subobject : SentAnaUnit
+ * shortcut : sa
+ * context size : one phrase, max. 30 non-PUNC items, for non-processed items
+ * one item if internal input empty
+ */
+
+/** @addtogroup picosa
+
+ internal buffers:
+
+ - headx: array for extended item heads of fixed size (head plus
+ index for content, plus two fields for boundary strength/type)
+
+ - cbuf1, cbuf2: buffers for item contents (referenced by index in
+ headx). Future: replace these two buffers by a single double-sided
+ buffer (double shrink-grow type)
+
+ 0. bottom up filling of items in headx and cbuf1
+
+ 1. POS disambiguation (right-to-left, top-to-bottom):
+ - number and sequence of items unchanged
+ - item content can only get smaller (reducing nr of results in WORDINDEX)
+ -> info stays in "headx, cbuf1" and changed in place \n
+ WORDGRAPH(POSes,NA)graph -> WORDGRAPH(POS,NA)graph \n
+ WORDINDEX(POSes,NA)POS1ind1...POSNindN -> WORDINDEX(POS,NA)POS|ind \n
+
+ 2. lex-index lookup and G2P (both directions possible, left-to-right done):
+ - number and sequence of items unchanged, item head info and content
+ changes
+ -> headx changed in place; cbuf1 to cbuf2 \n
+ WORDGRAPH(POS,NA)graph -> WORDPHON(POS,NA)phon \n
+ WORDINDEX(POS,NA)POS|ind -> WORDPHON(POS,NA)phon \n
+
+ 3. phrasing (right-to-left):
+
+ Previous (before introducing SBEG)\n
+ ----------------------------------
+ 1| 2| 3| 4| \n
+ e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH \n
+ e.g. to BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM \n
+ |1 |2 |3 |4 \n
+
+ 3-level bound state: to keep track of bound strength from end of
+ previous punc-phrase, then BOUND item output as first item
+ (strength from prev punc-phrase and type from current
+ punc-phrase).
+
+ trailing PUNC item bound states
+ INIT SEND PHR1
+ PUNC(SENTEND, T) B(I,T)>SEND B(S,T)>SEND B(P1,T)>SEND
+ PUNC(SENTEND, Q) B(I,Q)>SEND B(S,Q)>SEND B(P1,Q)>SEND
+ PUNC(SENTEND, E) B(I,E)>SEND B(S,E)>SEND B(P1,E)>SEND
+ PUNC(PHRASEEND, P) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1
+ PUNC(PHRASEEND, FORC) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1
+ PUNC(FLUSH, T) B(I,T).. B(S,T).. B(P1,T)..
+ B(T,NA) B(T,NA) B(T,NA)
+ >INIT >INIT >INIT
+
+ PHR2/3 case:
+ trailing PUNC item bound states
+ INIT SEND PHR1
+ PUNC(SENTEND, T) B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND
+ PUNC(SENTEND, Q) B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND
+ PUNC(SENTEND, E) B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND
+ PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
+ PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
+ PUNC(FLUSH, T) B(I,P)B(P,T).. B(S,T)B(P,T).. B(P1,T)B(P,T)..
+ B(T,NA) B(T,NA) B(T,NA)
+ >INIT >INIT >INIT
+
+ Current
+ --------
+ e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH
+ e.g. to BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM
+ |1 |2 |3 |4
+
+ 2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
+ allmost full), with the trailing PUNCT item included (last item).
+ If the trailing PUNC is a a primary phrase separator, the
+ item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
+ be output at the start of the next primary phrase.
+ Otherwise,
+ the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
+ so that a BOUND of type SBEG is output at the start of the next primary phrase.
+
+ trailing PUNC item bound states
+ SSEP PPHR
+ PUNC(SENTEND, X) B(B,X)>SSEP B(P1,X)>SSEP (X = T | Q | E)
+ PUNC(FLUSH, T) B(B,T)>SSEP* B(P1,T)>SSEP
+ PUNC(PHRASEEND, P) B(B,P)>PPHR B(P1,P)>PPHR
+ PUNC(PHRASEEND, FORC) B(B,P)>PPHR B(P1,P)>PPHR
+
+* If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
+ all but the first will be treated as an (empty) phrase containing just this item.
+ If this (single) item is a flush, creation of SBEG is suppressed.
+
+
+ - dtphr phrasing tree (rather subphrasing tree it should be called)
+ determines
+ BOUND_PHR2
+ BOUND_PHR3
+ - boundary strenghts are determined for every word (except the
+ first one) from right-to-left. The boundary types mark the phrase
+ type of the phrase following the boundary.
+ - number of items actually changed (new BOUND items added): because
+ of fixed size without content, two fields are contained in headx
+ to indicate if a BOUND needs to be added to the LEFT of the item.
+ -> headx further extended with boundary strength and type info to
+ indicate that to the left of the headx ele a BOUND needs to be
+ inserted when outputting.
+
+ 4. accentuation:
+ - number of items unchanged, content unchanged, only head info changes
+ -> changed in place in headx
+*/
+
+
+typedef struct {
+ picodata_itemhead_t head;
+ picoos_uint16 cind;
+} picosa_headx_t;
+
+
+typedef struct sa_subobj {
+ picoos_uint8 procState; /* for next processing step decision */
+
+ picoos_uint8 inspaceok; /* flag: headx/cbuf1 has space for an item */
+ picoos_uint8 needsmoreitems; /* flag: need more items */
+ picoos_uint8 phonesTransduced; /* flag: */
+
+ picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE]; /* tmp. location for an item */
+
+ picosa_headx_t headx[PICOSA_MAXNR_HEADX];
+ picoos_uint16 headxBottom; /* bottom */
+ picoos_uint16 headxLen; /* length, 0 if empty */
+
+ picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF];
+ picoos_uint16 cbuf1BufSize; /* actually allocated size */
+ picoos_uint16 cbuf1Len; /* length, 0 if empty */
+
+ picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF];
+ picoos_uint16 cbuf2BufSize; /* actually allocated size */
+ picoos_uint16 cbuf2Len; /* length, 0 if empty */
+
+ picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1];
+ picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1];
+ picotrns_possym_t * phonBuf;
+ picotrns_possym_t * phonBufOut;
+ picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
+ picoos_uint16 nextReadPos; /* position of (potential) next item to read from */
+
+
+ /* buffer for internal calculation of transducer */
+ picotrns_AltDesc altDescBuf;
+ /* the number of AltDesc in the buffer */
+ picoos_uint16 maxAltDescLen;
+
+ /* tab knowledge base */
+ picoktab_Graphs tabgraphs;
+ picoktab_Phones tabphones;
+ picoktab_Pos tabpos;
+ picoktab_FixedIds fixedIds;
+
+ /* dtposd knowledge base */
+ picokdt_DtPosD dtposd;
+
+ /* dtg2p knowledge base */
+ picokdt_DtG2P dtg2p;
+
+ /* lex knowledge base */
+ picoklex_Lex lex;
+
+ /* ulex knowledge bases */
+ picoos_uint8 numUlex;
+ picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX];
+
+ /* fst knowledge bases */
+ picoos_uint8 numFsts;
+ picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS];
+ picoos_uint8 curFst; /* the fst to be applied next */
+
+
+} sa_subobj_t;
+
+
+static pico_status_t saInitialize(register picodata_ProcessingUnit this) {
+ sa_subobj_t * sa;
+ picoos_uint16 i;
+ picokfst_FST fst;
+ picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY;
+ picoklex_Lex ulex;
+ picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY;
+
+ PICODBG_DEBUG(("calling"));
+
+ if (NULL == this || NULL == this->subObj) {
+ return picoos_emRaiseException(this->common->em,
+ PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
+ }
+ sa = (sa_subobj_t *) this->subObj;
+
+ /* sa->common = this->common; */
+
+ sa->procState = SA_STEPSTATE_COLLECT;
+
+ sa->inspaceok = TRUE;
+ sa->needsmoreitems = TRUE;
+
+ sa->headxBottom = 0;
+ sa->headxLen = 0;
+ sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF;
+ sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF;
+ sa->cbuf1Len = 0;
+ sa->cbuf2Len = 0;
+
+ /* init headx, cbuf1, cbuf2 */
+ for (i = 0; i < PICOSA_MAXNR_HEADX; i++){
+ sa->headx[i].head.type = 0;
+ sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA;
+ sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA;
+ sa->headx[i].head.len = 0;
+ sa->headx[i].cind = 0;
+ }
+ for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) {
+ sa->cbuf1[i] = 0;
+ sa->cbuf2[i] = 0;
+ }
+
+
+ /* possym buffer */
+ sa->phonesTransduced = FALSE;
+ sa->phonBuf = sa->phonBufA;
+ sa->phonBufOut = sa->phonBufB;
+ sa->phonReadPos = 0;
+ sa->phonWritePos = 0;
+ sa->nextReadPos = 0;
+
+ /* kb fst[] */
+ sa->numFsts = 0;
+ for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) {
+ fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]);
+ if (NULL != fst) {
+ sa->fst[sa->numFsts++] = fst;
+ }
+ }
+ sa->curFst = 0;
+ PICODBG_DEBUG(("got %i fsts", sa->numFsts));
+ /* kb fixedIds */
+ sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]);
+
+ /* kb tabgraphs */
+ sa->tabgraphs =
+ picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
+ if (sa->tabgraphs == NULL) {
+ return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
+ NULL, NULL);
+ }
+ PICODBG_DEBUG(("got tabgraphs"));
+
+ /* kb tabphones */
+ sa->tabphones =
+ picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
+ if (sa->tabphones == NULL) {
+ return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
+ NULL, NULL);
+ }
+ PICODBG_DEBUG(("got tabphones"));
+
+#ifdef PICO_DEBU
+ {
+ picoos_uint16 itmp;
+ for (itmp = 0; itmp < 256; itmp++) {
+ if (picoktab_hasVowelProp(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
+ }
+ if (picoktab_hasDiphthProp(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
+ }
+ if (picoktab_hasGlottProp(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
+ }
+ if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
+ }
+ if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
+ }
+ if (picoktab_isPrimstress(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
+ }
+ if (picoktab_isSecstress(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
+ }
+ if (picoktab_isSyllbound(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
+ }
+ if (picoktab_isPause(sa->tabphones, itmp)) {
+ PICODBG_DEBUG(("tabphones isPause: %d", itmp));
+ }
+ }
+
+ PICODBG_DEBUG(("tabphones primstressID: %d",
+ picoktab_getPrimstressID(sa->tabphones)));
+ PICODBG_DEBUG(("tabphones secstressID: %d",
+ picoktab_getSecstressID(sa->tabphones)));
+ PICODBG_DEBUG(("tabphones syllboundID: %d",
+ picoktab_getSyllboundID(sa->tabphones)));
+ PICODBG_DEBUG(("tabphones pauseID: %d",
+ picoktab_getPauseID(sa->tabphones)));
+ }
+#endif
+
+ /* kb tabpos */
+ sa->tabpos =
+ picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]);
+ if (sa->tabpos == NULL) {
+ return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
+ NULL, NULL);
+ }
+ PICODBG_DEBUG(("got tabpos"));
+
+ /* kb dtposd */
+ sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]);
+ if (sa->dtposd == NULL) {
+ return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
+ NULL, NULL);
+ }
+ PICODBG_DEBUG(("got dtposd"));
+
+ /* kb dtg2p */
+ sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]);
+ if (sa->dtg2p == NULL) {
+ return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
+ NULL, NULL);
+ }
+ PICODBG_DEBUG(("got dtg2p"));
+
+ /* kb lex */
+ sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]);
+ if (sa->lex == NULL) {
+ return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
+ NULL, NULL);
+ }
+ PICODBG_DEBUG(("got lex"));
+
+ /* kb ulex[] */
+ sa->numUlex = 0;
+ for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) {
+ ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]);
+ if (NULL != ulex) {
+ sa->ulex[sa->numUlex++] = ulex;
+ }
+ }
+ PICODBG_DEBUG(("got %i user lexica", sa->numUlex));
+
+ return PICO_OK;
+}
+
+static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
+ picoos_int16 mode,
+ picoos_uint16 *numBytesOutput);
+
+static pico_status_t saTerminate(register picodata_ProcessingUnit this) {
+ return PICO_OK;
+}
+
+static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this,
+ picoos_MemoryManager mm) {
+ sa_subobj_t * sa;
+ if (NULL != this) {
+ sa = (sa_subobj_t *) this->subObj;
+ picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
+ picoos_deallocate(mm, (void *) &this->subObj);
+ }
+ return PICO_OK;
+}
+
+
+picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm,
+ picoos_Common common,
+ picodata_CharBuffer cbIn,
+ picodata_CharBuffer cbOut,
+ picorsrc_Voice voice) {
+ picodata_ProcessingUnit this;
+ sa_subobj_t * sa;
+ this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
+ if (this == NULL) {
+ return NULL;
+ }
+
+ this->initialize = saInitialize;
+ PICODBG_DEBUG(("set this->step to saStep"));
+ this->step = saStep;
+ this->terminate = saTerminate;
+ this->subDeallocate = saSubObjDeallocate;
+
+ this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t));
+ if (this->subObj == NULL) {
+ picoos_deallocate(mm, (void *)&this);
+ picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
+ return NULL;
+ }
+
+ sa = (sa_subobj_t *) this->subObj;
+
+ sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen);
+ if (NULL == sa->altDescBuf) {
+ picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
+ picoos_deallocate(mm, (void *)&sa);
+ picoos_deallocate(mm, (void *)&this);
+ picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL);
+ }
+
+
+ saInitialize(this);
+ return this;
+}
+
+
+/* ***********************************************************************/
+/* PROCESS_POSD disambiguation functions */
+/* ***********************************************************************/
+
+/* find next POS to the right of 'ind' and return its POS and index */
+static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa,
+ const picoos_uint16 ind,
+ const picoos_uint16 top,
+ picoos_uint16 *rightind) {
+ picoos_uint8 val;
+ picoos_int32 i;
+
+ val = PICOKDT_EPSILON;
+ for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) {
+ if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) ||
+ (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX) ||
+ (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) {
+ val = sa->headx[i].head.info1;
+ }
+ }
+ *rightind = i - 1;
+ return val;
+}
+
+
+/* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */
+static pico_status_t saDisambPos(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa) {
+ picokdt_classify_result_t dtres;
+ picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1;
+ picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */
+ picoos_uint16 prevout; /* direct dt output (hist.) or POS of prev word */
+ picoos_uint16 lastprev3; /* last index of POS(es) found to the left */
+ picoos_uint16 curPOS; /* POS(es) of current word */
+ picoos_int32 first; /* index of first item with POS(es) */
+ picoos_int32 ci;
+ picoos_uint8 okay; /* two uses: processing okay and lexind resovled */
+ picoos_uint8 i;
+ picoos_uint16 inval;
+ picoos_uint16 fallback;
+
+ /* set initial values */
+ okay = TRUE;
+ prevout = PICOKDT_HISTORY_ZERO;
+ curPOS = PICODATA_ITEMINFO1_ERR;
+ first = 0;
+
+ while ((first < sa->headxLen) &&
+ (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) &&
+ (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) &&
+ (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) {
+ first++;
+ }
+ if (first >= sa->headxLen) {
+ /* phrase not containing an item with POSes info, e.g. single flush */
+ PICODBG_DEBUG(("no item with POSes found"));
+ return PICO_OK;
+ }
+
+ lastprev3 = first;
+
+ for (i = 0; i <= half_nratt_posd; i++) {
+ valbuf[i] = PICOKDT_HISTORY_ZERO;
+ }
+ /* set POS(es) of current word, will be shifted afterwards */
+ valbuf[half_nratt_posd+1] = sa->headx[first].head.info1;
+ for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) {
+ /* find next POS to the right and set valbuf[i] */
+ valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
+ }
+
+ PICODBG_TRACE(("headxLen: %d", sa->headxLen));
+
+ /* process from left to right all items in headx */
+ for (ci = first; ci < sa->headxLen; ci++) {
+ okay = TRUE;
+
+ PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type));
+
+ /* if not (WORDGRAPH or WORDINDEX) */
+ if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) &&
+ (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX) &&
+ (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) {
+ continue;
+ }
+
+ PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1));
+
+ /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */
+ /* shift all elements one position to the left */
+ /* shift predicted values (history) */
+ for (i=1; i<half_nratt_posd; i++) {
+ valbuf[i-1] = valbuf[i];
+ }
+ /* insert previously predicted value (now history) */
+ valbuf[half_nratt_posd-1] = prevout;
+ /* shift not yet predicted values */
+ for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) {
+ valbuf[i-1] = valbuf[i];
+ }
+ /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */
+ valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
+
+ /* just to be on the safe side; the following should never happen */
+ if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) {
+ PICODBG_WARN(("syncing POS"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
+ NULL, NULL);
+ valbuf[half_nratt_posd] = sa->headx[ci].head.info1;
+ }
+
+ curPOS = valbuf[half_nratt_posd];
+
+ /* Check if POS disambiguation not needed */
+ if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) {
+ /* not needed */
+ inval = 0;
+ fallback = 0;
+ if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS,
+ &prevout, &fallback)) {
+ if (fallback) {
+ prevout = fallback;
+
+ } else {
+ PICODBG_ERROR(("problem doing reverse output mapping"));
+ prevout = curPOS;
+ }
+ }
+ PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1));
+ continue;
+ }
+
+ /* assuming PICOKDT_NRATT_POSD == 7 */
+ PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]",
+ ci, valbuf[0], valbuf[1], valbuf[2],
+ valbuf[3], valbuf[4], valbuf[5], valbuf[6]));
+
+ /* no continue so far => POS disambiguation needed */
+ /* construct input vector, which is set in dtposd */
+ if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) {
+ /* error constructing invec */
+ PICODBG_WARN(("problem with invec"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
+ NULL, NULL);
+ okay = FALSE;
+ }
+ /* classify */
+ if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) {
+ /* error doing classification */
+ PICODBG_WARN(("problem classifying"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
+ NULL, NULL);
+ okay = FALSE;
+ }
+ /* decompose */
+ if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) {
+ /* error decomposing */
+ PICODBG_WARN(("problem decomposing"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
+ NULL, NULL);
+ okay = FALSE;
+ }
+ if (okay && dtres.set) {
+ PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class));
+ } else {
+ PICODBG_WARN(("problem disambiguating POS"));
+ dtres.class = PICODATA_ITEMINFO1_ERR;
+ }
+
+ if (dtres.class > 255) {
+ PICODBG_WARN(("dt result outside valid range, setting pos to ERR"));
+ dtres.class = PICODATA_ITEMINFO1_ERR;
+ }
+
+ sa->headx[ci].head.info1 = (picoos_uint8)dtres.class;
+ if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) {
+ /* find pos/ind entry in cbuf matching unique,
+ disambiguated POS, adapt current headx cind/len
+ accordingly */
+ PICODBG_DEBUG(("select phon based on POS disambiguation"));
+ okay = FALSE;
+ for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) {
+ PICODBG_DEBUG(("comparing POS at cind + %d", i));
+ if (picoktab_isPartOfPosGroup(sa->tabpos,
+ (picoos_uint8)dtres.class,
+ sa->cbuf1[sa->headx[ci].cind + i])) {
+ PICODBG_DEBUG(("found match for entry %d",
+ i/PICOKLEX_POSIND_SIZE + 1));
+ sa->headx[ci].cind += i;
+ okay = TRUE;
+ break;
+ }
+ }
+ /* not finding a match is possible if posd predicts a POS that
+ is not part of any of the input POSes -> no warning */
+#if defined(PICO_DEBUG)
+ if (!okay) {
+ PICODBG_DEBUG(("no match found, selecting 1st entry"));
+ }
+#endif
+ sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE;
+ }
+ }
+ return PICO_OK;
+}
+
+
+/* ***********************************************************************/
+/* PROCESS_WPHO functions, copy, lexindex, and g2p */
+/* ***********************************************************************/
+
+/* ************** copy ***************/
+
+static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa,
+ picoos_uint16 ind) {
+ picoos_uint16 i;
+ picoos_uint16 cind1;
+
+ /* set headx.cind, and copy content, head unchanged */
+ cind1 = sa->headx[ind].cind;
+ sa->headx[ind].cind = sa->cbuf2Len;
+
+ /* check cbufLen */
+ if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) {
+ sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len;
+ PICODBG_WARN(("phones skipped"));
+ picoos_emRaiseWarning(this->common->em,
+ PICO_WARN_INCOMPLETE, NULL, NULL);
+ if (sa->headx[ind].head.len == 0) {
+ sa->headx[ind].cind = 0;
+ }
+ }
+
+ for (i = 0; i < sa->headx[ind].head.len; i++) {
+ sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i];
+ sa->cbuf2Len++;
+ }
+
+ PICODBG_DEBUG(("%c item, len: %d",
+ sa->headx[ind].head.type, sa->headx[ind].head.len));
+
+ return PICO_OK;
+}
+
+
+/* ************** lexindex ***************/
+
+static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa,
+ picoklex_Lex lex,
+ picoos_uint16 ind) {
+ picoos_uint8 pos;
+ picoos_uint8 *phones;
+ picoos_uint8 plen;
+ picoos_uint16 i;
+
+ if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]),
+ PICOKLEX_IND_SIZE, &pos, &phones, &plen)) {
+ sa->headx[ind].cind = sa->cbuf2Len;
+
+ /* check cbufLen */
+ if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) {
+ plen = sa->cbuf2BufSize - sa->cbuf2Len;
+ PICODBG_WARN(("phones skipped"));
+ picoos_emRaiseWarning(this->common->em,
+ PICO_WARN_INCOMPLETE, NULL, NULL);
+ if (plen == 0) {
+ sa->headx[ind].cind = 0;
+ }
+ }
+
+ /* set item head, info1, info2 unchanged */
+ sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
+ sa->headx[ind].head.len = plen;
+
+ for (i = 0; i < plen; i++) {
+ sa->cbuf2[sa->cbuf2Len] = phones[i];
+ sa->cbuf2Len++;
+ }
+
+ PICODBG_DEBUG(("%c item, pos: %d, plen: %d",
+ PICODATA_ITEM_WORDPHON, pos, plen));
+
+ } else {
+ PICODBG_WARN(("lexIndLookup problem"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
+ NULL, NULL);
+ }
+ return PICO_OK;
+}
+
+
+
+/* ************** g2p ***************/
+
+
+/* Name : saGetNvowel
+ Function: returns vowel info in a word or word seq
+ Input : sInChar the grapheme string to be converted in phoneme
+ inLen number of bytes in grapheme buffer
+ inPos start position of current grapheme (0..inLen-1)
+ Output : nVow number of vowels in the word
+ nVord vowel order in the word
+ Returns : TRUE: processing successful; FALSE: errors
+*/
+static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa,
+ const picoos_uint8 *sInChar,
+ const picoos_uint16 inLen,
+ const picoos_uint8 inPos,
+ picoos_uint8 *nVow,
+ picoos_uint8 *nVord) {
+ picoos_uint32 nCount;
+ picoos_uint32 pos;
+ picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1];
+
+ /*defaults*/
+ *nVow = 0;
+ *nVord = 0;
+ /*1:check wether the current char is a vowel*/
+ pos = inPos;
+ if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) ||
+ !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) {
+ return FALSE;
+ }
+ /*2:count number of vowels in current word and find vowel order*/
+ for (nCount = 0; nCount < inLen; ) {
+ if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) {
+ return FALSE;
+ }
+ if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr,
+ PICOBASE_UTF8_MAXLEN)) {
+ (*nVow)++;
+ if (nCount == pos) {
+ (*nVord) = (*nVow);
+ }
+ }
+ }
+ return TRUE;
+}
+
+
+/* do g2p for a full word, right-to-left */
+static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa,
+ const picoos_uint8 *graph,
+ const picoos_uint8 graphlen,
+ const picoos_uint8 pos,
+ picoos_uint8 *phones,
+ const picoos_uint16 phonesmaxlen,
+ picoos_uint16 *plen) {
+ picoos_uint16 outNp1Ch; /*last 3 outputs produced*/
+ picoos_uint16 outNp2Ch;
+ picoos_uint16 outNp3Ch;
+ picoos_uint8 nPrimary;
+ picoos_uint8 nCount;
+ picoos_uint32 utfpos;
+ picoos_uint16 nOutVal;
+ picoos_uint8 okay;
+ picoos_uint16 phonesind;
+ picoos_uint8 nrvow;
+ picoos_uint8 ordvow;
+ picokdt_classify_vecresult_t dtresv;
+ picoos_uint16 i;
+
+ *plen = 0;
+ okay = TRUE;
+
+ /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the
+ phones which are predicted in reverse order. Once all are
+ available put them in phones in usuable order. phonesind is
+ used to fille item in reverse order starting at the end of
+ tmpbuf. */
+ phonesind = PICOSA_MAXITEMSIZE - 1;
+
+ /* prepare the data for loop operations */
+ outNp1Ch = PICOKDT_HISTORY_ZERO;
+ outNp2Ch = PICOKDT_HISTORY_ZERO;
+ outNp3Ch = PICOKDT_HISTORY_ZERO;
+
+ /* inner loop */
+ nPrimary = 0;
+
+ /* ************************************************/
+ /* go backward grapheme by grapheme, it's utf8... */
+ /* ************************************************/
+
+ /* set start nCount to position of start of last utfchar */
+ /* ! watch out! somethimes starting at 1, sometimes at 0,
+ ! sometimes counting per byte, sometimes per UTF8 char */
+ /* nCount is (start position + 1) of utf8 char */
+ utfpos = graphlen;
+ if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
+ nCount = utfpos + 1;
+ } else {
+ /* should not occurr */
+ PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen));
+ return FALSE;
+ }
+
+ while (nCount > 0) {
+ PICODBG_TRACE(("right-to-left g2p, count: %d", nCount));
+ okay = TRUE;
+
+ if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow,
+ &ordvow)) {
+ nrvow = 0;
+ ordvow = 0;
+ }
+
+ /* prepare input vector, set inside tree object invec,
+ * g2pBuildVector will call the constructInVec tree method */
+ if (!picokdt_dtG2PconstructInVec(sa->dtg2p,
+ graph, /*grapheme start*/
+ graphlen, /*grapheme length*/
+ nCount-1, /*grapheme current position*/
+ pos, /*Word POS*/
+ nrvow, /*nr vowels if vowel, 0 else */
+ ordvow, /*ord of vowel if vowel, 0 el*/
+ &nPrimary, /*primary stress flag*/
+ outNp1Ch, /*Right phoneme context +1*/
+ outNp2Ch, /*Right phoneme context +2*/
+ outNp3Ch)) { /*Right phon context +3*/
+ /*Errors in preparing the input vector : skip processing*/
+ PICODBG_WARN(("problem with invec"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
+ NULL, NULL);
+ okay = FALSE;
+ }
+
+ /* classify using the invec in the tree object and save the direct
+ tree output also in the tree object */
+ if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) {
+ /* error doing classification */
+ PICODBG_WARN(("problem classifying"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
+ NULL, NULL);
+ okay = FALSE;
+ }
+
+ /* decompose the invec in the tree object and return result in dtresv */
+ if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) {
+ /* error decomposing */
+ PICODBG_WARN(("problem decomposing"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
+ NULL, NULL);
+ okay = FALSE;
+ }
+
+ if (okay) {
+ if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) {
+ /* no phones to be added */
+ PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1]));
+ ;
+ } else {
+ /* add decomposed output to tmpbuf, reverse order */
+ for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) -
+ phonesind)<phonesmaxlen) &&
+ (i > 0)); ) {
+ i--;
+ PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i]));
+ if (dtresv.classvec[i] > 255) {
+ PICODBG_WARN(("dt result outside valid range, "
+ "skipping phone"));
+ continue;
+ }
+ sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i];
+ if (!nPrimary) {
+ if (picoktab_isPrimstress(sa->tabphones,
+ (picoos_uint8)dtresv.classvec[i])) {
+ nPrimary = 1;
+ }
+ }
+ (*plen)++;
+ }
+ if (i > 0) {
+ PICODBG_WARN(("phones skipped"));
+ picoos_emRaiseWarning(this->common->em,
+ PICO_WARN_INCOMPLETE, NULL, NULL);
+ }
+ }
+ }
+
+ /*shift tree output history and update*/
+ outNp3Ch = outNp2Ch;
+ outNp2Ch = outNp1Ch;
+ outNp1Ch = nOutVal;
+
+ /* go backward one utf8 char */
+ /* nCount is in +1 domain */
+ if (nCount <= 1) {
+ /* end of str */
+ nCount = 0;
+ } else {
+ utfpos = nCount - 1;
+ if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
+ /* should not occur */
+ PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos));
+ return FALSE;
+ } else {
+ nCount = utfpos + 1;
+ }
+ }
+ }
+
+ /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */
+ /* now that we have all phone IDs, copy in correct order to phones */
+ /* phonesind point to next free slot in the reverse domainn,
+ ie. inc first */
+ phonesind++;
+ for (i = 0; i < *plen; i++, phonesind++) {
+ phones[i] = sa->tmpbuf[phonesind];
+ }
+ return TRUE;
+}
+
+
+/* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */
+
+static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa,
+ picoos_uint16 ind) {
+ picoos_uint16 plen;
+
+ PICODBG_TRACE(("starting g2p"));
+
+ if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]),
+ sa->headx[ind].head.len, sa->headx[ind].head.info1,
+ &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len),
+ &plen)) {
+
+ /* check of cbuf2Len done in saDoG2P, phones skipped if needed */
+ if (plen > 255) {
+ PICODBG_WARN(("maximum number of phones exceeded (%d), skipping",
+ plen));
+ plen = 255;
+ }
+
+ /* set item head, info1, info2 unchanged */
+ sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
+ sa->headx[ind].head.len = (picoos_uint8)plen;
+ sa->headx[ind].cind = sa->cbuf2Len;
+ sa->cbuf2Len += plen;
+ PICODBG_DEBUG(("%c item, plen: %d",
+ PICODATA_ITEM_WORDPHON, plen));
+ } else {
+ PICODBG_WARN(("problem doing g2p"));
+ picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
+ NULL, NULL);
+ }
+ return PICO_OK;
+}
+
+
+/* ***********************************************************************/
+/* extract phonemes of an item into a phonBuf */
+/* ***********************************************************************/
+
+static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) {
+ /* picoos_uint8 plane, unshifted; */
+
+ /* just for debuging */
+ /*
+ unshifted = picotrns_unplane(sym,&plane);
+ PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos));
+ */
+ if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) {
+ /* not an error! */
+ PICODBG_DEBUG(("couldn't add because phon buffer full"));
+ return PICO_EXC_BUF_OVERFLOW;
+ } else {
+ sa->phonBuf[sa->phonWritePos].pos = pos;
+ sa->phonBuf[sa->phonWritePos].sym = sym;
+ sa->phonWritePos++;
+ return PICO_OK;
+ }
+}
+
+/*
+static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) {
+ return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
+ (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
+}
+
+
+static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) {
+ return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
+ (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
+}
+
+*/
+
+static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this,
+ register sa_subobj_t *sa, picoos_uint16 pos,
+ picodata_itemhead_t* head, const picoos_uint8* content)
+{
+ pico_status_t rv= PICO_OK;
+ picoos_uint8 i;
+ picoos_int16 fstSymbol;
+#if defined(PICO_DEBUG)
+ picoos_char msgstr[SA_MSGSTR_SIZE];
+#endif
+
+ PICODBG_TRACE(("doing item %s",
+ picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE)));
+ /*
+ Items considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as
+ 'pos'.
+ Elements that go into the transduction receive "their" position in the buffer.
+ */
+ sa->phonWritePos = 0;
+ /* WORDPHON(POS,WACC)phon */
+ rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
+ (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
+ for (i = 0; i < head->len; i++) {
+ fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i];
+ /* */
+ PICODBG_TRACE(("adding phoneme %c",fstSymbol));
+ rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol);
+ }
+ rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
+ (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
+ sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE + head->len;
+ return rv;
+}
+
+
+#define SA_POSSYM_OK 0
+#define SA_POSSYM_OUT_OF_RANGE 1
+#define SA_POSSYM_END 2
+#define SA_POSSYM_INVALID -3
+/* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside
+ * buf).
+ * 'rangeEnd' is the first possym position outside the desired range.
+ * Possible return values:
+ * SA_POSSYM_OK : 'pos' and 'sym' are set to the read possym, *readPos is advanced
+ * SA_POSSYM_OUT_OF_RANGE : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined
+ * SA_POSSYM_UNDERFLOW : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined
+ * SA_POSSYM_INVALID : "strange" pos. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined
+ */
+static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym,
+ picoos_int16 rangeEnd) {
+ /* skip POS_IGNORE */
+ while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos)) {
+ PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos));
+ sa->phonReadPos++;
+ }
+ if ((sa->phonReadPos < sa->phonWritePos)) {
+ *pos = sa->phonBuf[sa->phonReadPos].pos;
+ if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) {
+ *sym = sa->phonBuf[sa->phonReadPos++].sym;
+ return SA_POSSYM_OK;
+ } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */
+ return SA_POSSYM_INVALID;
+ } else {
+ return SA_POSSYM_OUT_OF_RANGE;
+ }
+ } else {
+ /* no more possyms to read */
+ *pos = PICOTRNS_POS_INVALID;
+ return SA_POSSYM_END;
+ }
+}
+
+
+
+
+/* ***********************************************************************/
+/* saStep function */
+/* ***********************************************************************/
+
+/*
+complete phrase processed in one step, if not fast enough -> rework
+
+init, collect into internal buffer, process, and then feed to
+output buffer
+
+init state: INIT ext ext
+state trans: in hc1 hc2 out
+
+INIT | putItem = 0 0 +1 | BUSY -> COLL (put B-SBEG item,
+ set do-init to false)
+
+ inspace-ok-hc1
+ needs-more-items-(phrase-or-flush)
+COLL1 |getItems -n +n 0 1 | ATOMIC -> PPOSD (got items,
+ if flush set do-init)
+COLL2 |getItems -n +n 1 0 | ATOMIC -> PPOSD (got items, forced)
+COLL3 |getItems -n +n 1 1 | IDLE (got items, need more)
+COLL4 |getItems = = 1 1 | IDLE (got no items)
+
+PPOSD | posd = ~n~n | BUSY -> PWP (posd done)
+PWP | lex/g2p = ~n-n 0+n | BUSY -> PPHR (lex/g2p done)
+PPHR | phr = -n 0 +m=n | BUSY -> PACC (phr done, m>=n)
+PACC | acc = 0 0 ~m=n | BUSY -> FEED (acc done)
+
+ doinit-flag
+FEED | putItems 0 0 0 -m-n +m 0 | BUSY -> COLL (put items)
+FEED | putItems 0 0 0 -m-n +m 1 | BUSY -> INIT (put items)
+FEED | putItems 0 0 0 -d-d +d | OUT_FULL (put some items)
+*/
+
+static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
+ picoos_int16 mode,
+ picoos_uint16 *numBytesOutput) {
+ register sa_subobj_t *sa;
+ pico_status_t rv = PICO_OK;
+ pico_status_t rvP = PICO_OK;
+ picoos_uint16 blen = 0;
+ picoos_uint16 clen = 0;
+ picoos_uint16 i;
+ picoklex_Lex lex;
+
+
+ if (NULL == this || NULL == this->subObj) {
+ return PICODATA_PU_ERROR;
+ }
+ sa = (sa_subobj_t *) this->subObj;
+ mode = mode; /* avoid warning "var not used in this function"*/
+ *numBytesOutput = 0;
+ while (1) { /* exit via return */
+ PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d",
+ sa->procState, sa->headxLen, sa->cbuf1Len,
+ sa->cbuf2Len));
+
+ switch (sa->procState) {
+
+ /* *********************************************************/
+ /* collect state: get item(s) from charBuf and store in
+ * internal buffers, need a complete punctuation-phrase
+ */
+ case SA_STEPSTATE_COLLECT:
+
+ while (sa->inspaceok && sa->needsmoreitems
+ && (PICO_OK ==
+ (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf,
+ PICOSA_MAXITEMSIZE, &blen)))) {
+ rvP = picodata_get_itemparts(sa->tmpbuf,
+ PICOSA_MAXITEMSIZE,
+ &(sa->headx[sa->headxLen].head),
+ &(sa->cbuf1[sa->cbuf1Len]),
+ sa->cbuf1BufSize-sa->cbuf1Len,
+ &clen);
+ if (rvP != PICO_OK) {
+ PICODBG_ERROR(("problem getting item parts"));
+ picoos_emRaiseException(this->common->em, rvP,
+ NULL, NULL);
+ return PICODATA_PU_ERROR;
+ }
+
+ /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
+ construct PUNC-FLUSH item in headx */
+ if ((sa->headx[sa->headxLen].head.type ==
+ PICODATA_ITEM_CMD) &&
+ (sa->headx[sa->headxLen].head.info1 ==
+ PICODATA_ITEMINFO1_CMD_FLUSH)) {
+ sa->headx[sa->headxLen].head.type =
+ PICODATA_ITEM_PUNC;
+ sa->headx[sa->headxLen].head.info1 =
+ PICODATA_ITEMINFO1_PUNC_FLUSH;
+ sa->headx[sa->headxLen].head.info2 =
+ PICODATA_ITEMINFO2_PUNC_SENT_T;
+ sa->headx[sa->headxLen].head.len = 0;
+ }
+
+ /* convert opening phoneme command to WORDPHON
+ * and assign user-POS XX to it (Bug 432) */
+ sa->headx[sa->headxLen].cind = sa->cbuf1Len;
+ /* maybe overwritten later */
+ if ((sa->headx[sa->headxLen].head.type ==
+ PICODATA_ITEM_CMD) &&
+ (sa->headx[sa->headxLen].head.info1 ==
+ PICODATA_ITEMINFO1_CMD_PHONEME)&&
+ (sa->headx[sa->headxLen].head.info2 ==
+ PICODATA_ITEMINFO2_CMD_START)) {
+ picoos_uint8 i;
+ picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones);
+ PICODBG_INFO(("wordsep id is %i",wordsep));
+ sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON;
+ sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX;
+ sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA;
+ /* cut off additional words */
+ i = 0;
+ while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) {
+ PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i]));
+
+ i++;
+ }
+ if (i < sa->headx[sa->headxLen].head.len) {
+ PICODBG_INFO(("cutting off superfluous phonetic words at %i",i));
+ sa->headx[sa->headxLen].head.len = i;
+ }
+ }
+
+ /* check/set needsmoreitems */
+ if (sa->headx[sa->headxLen].head.type ==
+ PICODATA_ITEM_PUNC) {
+ sa->needsmoreitems = FALSE;
+ }
+
+ /* check/set inspaceok, keep spare slot for forcing */
+ if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) ||
+ ((sa->cbuf1BufSize - sa->cbuf1Len) <
+ PICOSA_MAXITEMSIZE)) {
+ sa->inspaceok = FALSE;
+ }
+
+ if (clen > 0) {
+ sa->headx[sa->headxLen].cind = sa->cbuf1Len;
+ sa->cbuf1Len += clen;
+ } else {
+ sa->headx[sa->headxLen].cind = 0;
+ }
+ sa->headxLen++;
+ }
+
+ if (!sa->needsmoreitems) {
+ /* 1, phrase buffered */
+ sa->procState = SA_STEPSTATE_PROCESS_POSD;
+ return PICODATA_PU_ATOMIC;
+ } else if (!sa->inspaceok) {
+ /* 2, forced phrase end */
+ /* at least one slot is still free, use it to
+ force a trailing PUNC item */
+ sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC;
+ sa->headx[sa->headxLen].head.info1 =
+ PICODATA_ITEMINFO1_PUNC_PHRASEEND;
+ sa->headx[sa->headxLen].head.info2 =
+ PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
+ sa->headx[sa->headxLen].head.len = 0;
+ sa->needsmoreitems = FALSE; /* not really needed for now */
+ sa->headxLen++;
+ PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
+ picoos_emRaiseWarning(this->common->em,
+ PICO_WARN_FALLBACK, NULL,
+ (picoos_char *)"forced phrase end");
+ sa->procState = SA_STEPSTATE_PROCESS_POSD;
+ return PICODATA_PU_ATOMIC;
+ } else if (rv == PICO_EOF) {
+ /* 3, 4 */
+ return PICODATA_PU_IDLE;
+ } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
+ (rv == PICO_EXC_BUF_OVERFLOW)) {
+ /* error, no valid item in cb (UNDER) */
+ /* or tmpbuf not large enough, not possible (OVER) */
+ /* no exception raised, left for ctrl to handle */
+ PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
+ return PICODATA_PU_ERROR;
+ } else {
+ /* error, only possible if cbGetItem implementation
+ changes without this function being adapted*/
+ PICODBG_ERROR(("untreated return value, rv: %d", rv));
+ return PICODATA_PU_ERROR;
+ }
+ break;
+
+
+ /* *********************************************************/
+ /* process posd state: process items in headx/cbuf1
+ * and change in place
+ */
+ case SA_STEPSTATE_PROCESS_POSD:
+ /* ensure there is an item in inBuf */
+ if (sa->headxLen > 0) {
+ /* we have a phrase in headx, cbuf1 (can be
+ single PUNC item without POS), do pos disamb */
+ if (PICO_OK != saDisambPos(this, sa)) {
+ picoos_emRaiseException(this->common->em,
+ PICO_ERR_OTHER, NULL, NULL);
+ return PICODATA_PU_ERROR;
+ }
+ sa->procState = SA_STEPSTATE_PROCESS_WPHO;
+
+ } else if (sa->headxLen == 0) { /* no items in inBuf */
+ PICODBG_WARN(("no items in inBuf"));
+ sa->procState = SA_STEPSTATE_COLLECT;
+ return PICODATA_PU_BUSY;
+ }
+
+#if defined (PICO_DEBUG)
+ if (1) {
+ picoos_uint8 i, j, ittype;
+ for (i = 0; i < sa->headxLen; i++) {
+ ittype = sa->headx[i].head.type;
+ PICODBG_INFO_CTX();
+ PICODBG_INFO_MSG(("sa-d: ("));
+ PICODBG_INFO_MSG(("'%c',", ittype));
+ if ((32 <= sa->headx[i].head.info1) &&
+ (sa->headx[i].head.info1 < 127) &&
+ (ittype != PICODATA_ITEM_WORDGRAPH) &&
+ (ittype != PICODATA_ITEM_WORDINDEX)) {
+ PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
+ } else {
+ PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
+ }
+ if ((32 <= sa->headx[i].head.info2) &&
+ (sa->headx[i].head.info2 < 127)) {
+ PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
+ } else {
+ PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
+ }
+ PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
+
+ for (j = 0; j < sa->headx[i].head.len; j++) {
+ if ((ittype == PICODATA_ITEM_WORDGRAPH) ||
+ (ittype == PICODATA_ITEM_CMD)) {
+ PICODBG_INFO_MSG(("%c",
+ sa->cbuf1[sa->headx[i].cind+j]));
+ } else {
+ PICODBG_INFO_MSG(("%4d",
+ sa->cbuf1[sa->headx[i].cind+j]));
+ }
+ }
+ PICODBG_INFO_MSG(("\n"));
+ }
+ }
+#endif
+
+ break;
+
+
+ /* *********************************************************/
+ /* process wpho state: process items in headx/cbuf1 and modify
+ * headx in place and fill cbuf2
+ */
+ case SA_STEPSTATE_PROCESS_WPHO:
+ /* ensure there is an item in inBuf */
+ if (sa->headxLen > 0) {
+ /* we have a phrase in headx, cbuf1 (can be single
+ PUNC item), do lex lookup, g2p, or copy */
+
+ /* check if cbuf2 is empty as it should be */
+ if (sa->cbuf2Len > 0) {
+ /* enforce emptyness */
+ PICODBG_WARN(("forcing empty cbuf2, discarding buf"));
+ picoos_emRaiseWarning(this->common->em,
+ PICO_WARN_PU_DISCARD_BUF,
+ NULL, NULL);
+ }
+
+ /* cbuf2 overflow avoided in saGrapheme*, saLexInd*,
+ saCopyItem*, phones skipped if needed */
+ for (i = 0; i < sa->headxLen; i++) {
+ switch (sa->headx[i].head.type) {
+ case PICODATA_ITEM_WORDGRAPH:
+ if (PICO_OK != saGraphemeToPhoneme(this, sa,
+ i)) {
+ /* not possible, phones skipped if needed */
+ picoos_emRaiseException(this->common->em,
+ PICO_ERR_OTHER,
+ NULL, NULL);
+ return PICODATA_PU_ERROR;
+ }
+ break;
+ case PICODATA_ITEM_WORDINDEX:
+ if (0 == sa->headx[i].head.info2) {
+ lex = sa->lex;
+ } else {
+ lex = sa->ulex[sa->headx[i].head.info2-1];
+ }
+ if (PICO_OK != saLexIndLookup(this, sa, lex, i)) {
+ /* not possible, phones skipped if needed */
+ picoos_emRaiseException(this->common->em,
+ PICO_ERR_OTHER,
+ NULL, NULL);
+ return PICODATA_PU_ERROR;
+ }
+ break;
+ default:
+ /* copy item unmodified, ie. headx untouched,
+ content from cbuf1 to cbuf2 */
+ if (PICO_OK != saCopyItemContent1to2(this, sa,
+ i)) {
+ /* not possible, phones skipped if needed */
+ picoos_emRaiseException(this->common->em,
+ PICO_ERR_OTHER,
+ NULL, NULL);
+ return PICODATA_PU_ERROR;
+ }
+ break;
+ }
+ }
+ /* set cbuf1 to empty */
+ sa->cbuf1Len = 0;
+ sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
+
+ } else if (sa->headxLen == 0) { /* no items in inBuf */
+ PICODBG_WARN(("no items in inBuf"));
+ sa->procState = SA_STEPSTATE_COLLECT;
+ return PICODATA_PU_BUSY;
+ }
+
+#if defined (PICO_DEBUG)
+ if (1) {
+ picoos_uint8 i, j, ittype;
+ for (i = 0; i < sa->headxLen; i++) {
+ ittype = sa->headx[i].head.type;
+ PICODBG_INFO_CTX();
+ PICODBG_INFO_MSG(("sa-g: ("));
+ PICODBG_INFO_MSG(("'%c',", ittype));
+ if ((32 <= sa->headx[i].head.info1) &&
+ (sa->headx[i].head.info1 < 127) &&
+ (ittype != PICODATA_ITEM_WORDPHON)) {
+ PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
+ } else {
+ PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
+ }
+ if ((32 <= sa->headx[i].head.info2) &&
+ (sa->headx[i].head.info2 < 127)) {
+ PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
+ } else {
+ PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
+ }
+ PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
+
+ for (j = 0; j < sa->headx[i].head.len; j++) {
+ if ((ittype == PICODATA_ITEM_CMD)) {
+ PICODBG_INFO_MSG(("%c",
+ sa->cbuf2[sa->headx[i].cind+j]));
+ } else {
+ PICODBG_INFO_MSG(("%4d",
+ sa->cbuf2[sa->headx[i].cind+j]));
+ }
+ }
+ PICODBG_INFO_MSG(("\n"));
+ }
+ }
+#endif
+
+ break;
+
+
+ /* *********************************************************/
+ /* transduction parse state: extract phonemes of item in internal outBuf */
+ case SA_STEPSTATE_PROCESS_TRNS_PARSE:
+
+ PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)",
+ sa->headxBottom, sa->headxLen));
+
+ /* check for termination condition first */
+ if (0 == sa->headxLen) {
+ /* reset headx, cbuf2 */
+ sa->headxBottom = 0;
+ sa->cbuf2Len = 0;
+ /* reset collect state support variables */
+ sa->inspaceok = TRUE;
+ sa->needsmoreitems = TRUE;
+
+ sa->procState = SA_STEPSTATE_COLLECT;
+ return PICODATA_PU_BUSY;
+ }
+
+ sa->procState = SA_STEPSTATE_FEED;
+ /* copy item unmodified */
+ rv = picodata_put_itemparts(
+ &(sa->headx[sa->headxBottom].head),
+ &(sa->cbuf2[sa->headx[sa->headxBottom].cind]),
+ sa->headx[sa->headxBottom].head.len, sa->tmpbuf,
+ PICOSA_MAXITEMSIZE, &blen);
+
+ if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) {
+ PICODBG_DEBUG(("PARSE found WORDPHON"));
+ rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head),
+ &(sa->cbuf2[sa->headx[sa->headxBottom].cind]));
+ if (PICO_OK == rv) {
+ PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction"));
+ sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST;
+ } else {
+ PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv));
+ }
+ } else {
+ PICODBG_DEBUG(("PARSE found other item, just copying"));
+ }
+ if (SA_STEPSTATE_FEED == sa->procState) {
+ PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
+ (picoos_uint8 *)"sa-p: ",
+ sa->tmpbuf, PICOSA_MAXITEMSIZE);
+
+ }
+
+ /* consume item */
+ sa->headxBottom++;
+ sa->headxLen--;
+
+ break;
+
+ /* *********************************************************/
+ /* transduce state: copy item in internal outBuf to tmpBuf and transduce */
+ case SA_STEPSTATE_PROCESS_TRNS_FST:
+
+
+
+
+
+ /* if no word-level FSTs: doing trivial syllabification instead */
+ if (0 == sa->numFsts) {
+ PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos));
+#if defined(PICO_DEBUG)
+ {
+ PICODBG_INFO_CTX();
+ PICODBG_INFO_MSG(("sa trying to trivially syllabify: "));
+ PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
+ PICODBG_INFO_MSG(("\n"));
+ }
+#endif
+
+ picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf,
+ sa->phonWritePos, sa->phonBufOut,
+ &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
+ PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos));
+#if defined(PICO_DEBUG)
+ {
+ PICODBG_INFO_CTX();
+ PICODBG_INFO_MSG(("sa returned from syllabification: "));
+ PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
+ PICODBG_INFO_MSG(("\n"));
+ }
+#endif
+
+ /* eliminate deep epsilons */
+ PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos));
+ picotrns_eliminate_epsilons(sa->phonBufOut,
+ sa->phonWritePos, sa->phonBuf,
+ &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
+ PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos));
+ sa->phonReadPos = 0;
+ sa->phonesTransduced = 1;
+ sa->procState = SA_STEPSTATE_FEED;
+ break;
+ }
+
+ /* there are word-level FSTs */
+ /* termination condition first */
+ if (sa->curFst >= sa->numFsts) {
+ /* reset for next transduction */
+ sa->curFst = 0;
+ sa->phonReadPos = 0;
+ sa->phonesTransduced = 1;
+ sa->procState = SA_STEPSTATE_FEED;
+ break;
+ }
+
+ /* transduce from phonBufIn to PhonBufOut */
+ {
+
+ picoos_uint32 nrSteps;
+#if defined(PICO_DEBUG)
+ {
+ PICODBG_INFO_CTX();
+ PICODBG_INFO_MSG(("sa trying to transduce: "));
+ PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
+ PICODBG_INFO_MSG(("\n"));
+ }
+#endif
+ picotrns_transduce(sa->fst[sa->curFst], FALSE,
+ picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut,
+ &sa->phonWritePos,
+ PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf,
+ sa->maxAltDescLen, &nrSteps);
+#if defined(PICO_DEBUG)
+ {
+ PICODBG_INFO_CTX();
+ PICODBG_INFO_MSG(("sa returned from transduction: "));
+ PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
+ PICODBG_INFO_MSG(("\n"));
+ }
+#endif
+ }
+
+
+
+ /*
+ The trasduction output will contain equivalent items i.e. (x,y') for each (x,y) plus inserted deep symbols (-1,d).
+ In case of deletions, (x,0) might also be omitted...
+ */
+ /* eliminate deep epsilons */
+ picotrns_eliminate_epsilons(sa->phonBufOut,
+ sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
+ sa->phonesTransduced = 1;
+
+ sa->curFst++;
+
+ return PICODATA_PU_ATOMIC;
+ /* break; */
+
+ /* *********************************************************/
+ /* feed state: copy item in internal outBuf to output charBuf */
+
+ case SA_STEPSTATE_FEED:
+
+ PICODBG_DEBUG(("FEED"));
+
+ if (sa->phonesTransduced) {
+ /* replace original phones by transduced */
+ picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE;
+ picoos_uint8 plane;
+ picoos_int16 sym, pos;
+ while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) {
+ PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos));
+ sym = picotrns_unplane(sym, &plane);
+ PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane));
+ sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym;
+ }
+ PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE));
+ picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE);
+ if (SA_POSSYM_INVALID == rv) {
+ PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list"));
+ return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL);
+ }
+ sa->phonesTransduced = 0;
+
+ } /* if (sa->phonesTransduced) */
+
+
+ rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf,
+ PICOSA_MAXITEMSIZE, &clen);
+
+ *numBytesOutput += clen;
+
+ PICODBG_DEBUG(("put item, status: %d", rvP));
+
+ if (rvP == PICO_OK) {
+ } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
+ /* try again next time */
+ PICODBG_DEBUG(("feeding overflow"));
+ return PICODATA_PU_OUT_FULL;
+ } else {
+ /* error, should never happen */
+ PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
+ return PICODATA_PU_ERROR;
+ }
+
+ PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
+ (picoos_uint8 *)"sana: ",
+ sa->tmpbuf, PICOSA_MAXITEMSIZE);
+
+ sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
+ /* return PICODATA_PU_BUSY; */
+ break;
+
+ default:
+ break;
+ } /* switch */
+
+ } /* while */
+
+ /* should be never reached */
+ PICODBG_ERROR(("reached end of function"));
+ picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
+ return PICODATA_PU_ERROR;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/* end */