diff options
Diffstat (limited to 'lib/picokdt.c')
-rw-r--r-- | lib/picokdt.c | 2642 |
1 files changed, 2642 insertions, 0 deletions
diff --git a/lib/picokdt.c b/lib/picokdt.c new file mode 100644 index 0000000..54e36ac --- /dev/null +++ b/lib/picokdt.c @@ -0,0 +1,2642 @@ +/* + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file picokdt.c + * + * knowledge handling for decision trees + * + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * All rights reserved. + * + * History: + * - 2009-04-20 -- initial version + * + */ + +#include "picoos.h" +#include "picodbg.h" +#include "picobase.h" +#include "picoknow.h" +#include "picodata.h" +#include "picokdt.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + + +/* ************************************************************/ +/* decision tree */ +/* ************************************************************/ + +/** + * @addtogroup picokdt + * ---------------------------------------------------\n + * <b> Pico KDT support </b>\n + * ---------------------------------------------------\n + overview extended binary tree file: + - dt consists of optional attribute mapping tables and a non-empty + tree part + - using the attribute mapping tables an attribute value as used + throughout the TTS can be mapped to its smaller representation + used in the tree + - multi-byte values always little endian + + ------------------------------------------------------------------- + - bin-file, decision tree knowledge base in binary form + + - dt-kb = header inputmaptables outputmaptables tree + + + - header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2 + + - INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from + the start of kb to the start of input map tables, + may not be 0 + - OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from + the start of kb to the start of outtables, + may not be 0 + - TREEPOS2: two bytes, equals offest in number of bytes from the + start of kb to the start of the tree + + + - inputmaptables = maptables + - outputmaptables = maptables + - maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1 + - maptable = LENTABLE2 TABLETYPE1 ( bytemaptable + | wordmaptable + | graphinmaptable + | bytetovarmaptable ) + - bytemaptable (in or out, usage varies) = NRBYTES2 {BYTE1}=NRBYTES2 + - wordmaptable (in or out, usage varies) = NRWORDS2 {WORD2}=NRWORDS2 + - graphinmaptable (in only) = NRGRAPHS2 {GRAPH1:4}=NRGRAPHS2 + - bytetovarmaptable (out only) = NRINBYTES2 outvarsearchind + outvaroutputs + - outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2 + - outvaroutputs = {VARVALID1:}=NRINBYTES2 + + - bytemaptable: fixed size, *Map*Fixed \n + - wordmaptable: fixed size, *Map*Fixed \n + - graphinmaptable: search value is variable size (UTF8 grapheme), \n + value to be mapped to is fixed size, one byte \n + - bytetovarmaptable: search value is fixed size, one byte, values \n + to be mapped to are of variable size (e.g. several \n + phones) \n + + - NRMAPTABLES1: one byte representing the number of map tables + - LENTABLE2: two bytes, equals offset to the next table (or next + part of kb, e.g. tree), + if LENTABLE2 = 3, and + TABLETYPE1 = EMPTY -> empty table, no mapping to be done + - TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8) + - NRBYTES2: two bytes, number of bytes following in the table (one + would be okay, to simplify some implementation also set + to 2) + - BYTE1: one btye, the sequence is used to determine the values + being mapped to, starting with 0 + - NRWORDS2: two bytes, number of words (two btyes) following in the table + - WORD2: two bytes, the sequence is used to determine the values + being mapped to, starting with 0 + - NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following + in table + - GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the + sequence of graphemes is used to determine the value being + mapped to, starting with 0, the length information is + encoded in UTF8, no need for extra length info + - NRINBYTES2: two bytes, number of single byte IDs the tree can produce + - OUTVAROFFSET2: two bytes, offset from the start of the + outvaroutputs to the start of the following output + phone ID group, ie. the first outvaroffset is the + offset to the start of the second PHONEID + group. Using the previous outvaroffset (or the start + of the outvaroutputs) the start and lenth of the + PHONEID group can be determined and we can get the + sequence of output values we map the chunk value to + - VARVALID1:: one to several bytes, one byte each for an output phone ID + + - tree = treenodeinfos TREEBODYSIZE4 treebody + - treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields + - vfields = {VFIELD1}=NRVFIELDS1 + - qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1 + - treebody = "cf. code" + + - TREEBODYSIZE4: four bytes, size of treebody in number of bytes + - NRVFIELDS1: one byte, number of node properties in the following + vector (predefined and fixed sequence of properties) + - VFIELD1: number of bits used to represent a node property + - NRATTRIBUTES1: one byte, number of attributes (rows) in the + following matrix + - NRQFIELDS1: one byte, number (columns) of question-dependent node + properties per attribute in the following matrix + (predefined and fixed sequence of properties) + - QFIELD1: number of bits used to represent a question-dependent + property in the matrix + + + - Currently, + - NRVFIELDS1 is fixed at 2 for all trees, ie. + - vfields = 2 aVFIELD1 bVFIELD1 + - aVFIELD1: nr of bits for questions + - bVFIELD1: nr of bits for decisions + + - NRQFIELDS1 is fixed at 5 for all trees, ie. \n + - qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n + - aQFIELD1: nr of bits for fork count \n + - bQFIELD1: nr of bits for start position for subsets \n + - cQFIELD1: nr of bits for group size \n + - dQFIELD1: nr of bits for offset to reach output \n + - eQFIELD1: nr of bits for threshold (if continuous node) \n +*/ + + +/* ************************************************************/ +/* decision tree data defines */ +/* may not be changed with current implementation */ +/* ************************************************************/ + +/* maptables fields */ +#define PICOKDT_MTSPOS_NRMAPTABLES 0 + +/* position of first byte of first maptable (for omt the only table */ +#define PICOKDT_MTPOS_START 1 + +/* maptable fields */ +#define PICOKDT_MTPOS_LENTABLE 0 +#define PICOKDT_MTPOS_TABLETYPE 2 +#define PICOKDT_MTPOS_NUMBER 3 +#define PICOKDT_MTPOS_MAPSTART 5 + +/* treenodeinfos fields */ +#define PICOKDT_NIPOS_NRVFIELDS 0 +#define PICOKDT_NIPOS_NRATTS 3 +#define PICOKDT_NIPOS_NRQFIELDS 4 + +/* fixed treenodeinfos number of fields */ +#define PICOKDT_NODEINFO_NRVFIELDS 2 +#define PICOKDT_NODEINFO_NRQFIELDS 5 + +/* fixed number of bits used */ +#define PICOKDT_NODETYPE_NRBITS 2 +#define PICOKDT_SUBSETTYPE_NRBITS 2 +#define PICOKDT_ISDECIDE_NRBITS 1 + +/* number of inpmaptables for each tree. Since we have a possibly + empty input map table for each att, currently these values must be + equal to PICOKDT_NRATT* */ +typedef enum { + PICOKDT_NRINPMT_POSP = 12, + PICOKDT_NRINPMT_POSD = 7, + PICOKDT_NRINPMT_G2P = 16, + PICOKDT_NRINPMT_PHR = 8, + PICOKDT_NRINPMT_ACC = 13, + PICOKDT_NRINPMT_PAM = 60 +} kdt_nrinpmaptables_t; + +/* number of outmaptables for each tree, at least one, possibly empty, + output map table for each tree */ +typedef enum { + PICOKDT_NROUTMT_POSP = 1, + PICOKDT_NROUTMT_POSD = 1, + PICOKDT_NROUTMT_G2P = 1, + PICOKDT_NROUTMT_PHR = 1, + PICOKDT_NROUTMT_ACC = 1, + PICOKDT_NROUTMT_PAM = 1 +} kdt_nroutmaptables_t; + +/* maptable types */ +typedef enum { + PICOKDT_MTTYPE_EMPTY = 0, + PICOKDT_MTTYPE_BYTE = 1, + PICOKDT_MTTYPE_WORD = 2, + PICOKDT_MTTYPE_GRAPH = 3, + PICOKDT_MTTYPE_BYTETOVAR = 4 +} kdt_mttype_t; + + +/* ************************************************************/ +/* decision tree types and loading */ +/* ************************************************************/ +/* object : Dt*KnowledgeBase + * shortcut : kdt* + * derived from : picoknow_KnowledgeBase + */ + +/* subobj shared by all decision trees */ +typedef struct { + picokdt_kdttype_t type; + picoos_uint8 *inpmaptable; + picoos_uint8 *outmaptable; + picoos_uint8 *tree; + picoos_uint32 beg_offset[128]; /* for efficiency */ + + /* tree-internal details for faster processing */ + picoos_uint8 *vfields; + picoos_uint8 *qfields; + picoos_uint8 nrattributes; + picoos_uint8 *treebody; + /*picoos_uint8 nrvfields;*/ /* fix PICOKDT_NODEINFO_NRVFIELDS */ + /*picoos_uint8 nrqfields;*/ /* fix PICOKDT_NODEINFO_NRQFIELDS */ + + /* direct output vector (no output mapping) */ + picoos_uint8 dset; /* TRUE if class set, FALSE otherwise */ + picoos_uint16 dclass; +} kdt_subobj_t; + +/* subobj specific for each decision tree type */ +typedef struct { + kdt_subobj_t dt; + picoos_uint16 invec[PICOKDT_NRATT_POSP]; /* input vector */ + picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ +} kdtposp_subobj_t; + +typedef struct { + kdt_subobj_t dt; + picoos_uint16 invec[PICOKDT_NRATT_POSD]; /* input vector */ + picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ +} kdtposd_subobj_t; + +typedef struct { + kdt_subobj_t dt; + picoos_uint16 invec[PICOKDT_NRATT_G2P]; /* input vector */ + picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ +} kdtg2p_subobj_t; + +typedef struct { + kdt_subobj_t dt; + picoos_uint16 invec[PICOKDT_NRATT_PHR]; /* input vector */ + picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ +} kdtphr_subobj_t; + +typedef struct { + kdt_subobj_t dt; + picoos_uint16 invec[PICOKDT_NRATT_ACC]; /* input vector */ + picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ +} kdtacc_subobj_t; + +typedef struct { + kdt_subobj_t dt; + picoos_uint16 invec[PICOKDT_NRATT_PAM]; /* input vector */ + picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ +} kdtpam_subobj_t; + + +static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this, + picoos_Common common, + kdt_subobj_t *dtp) { + picoos_uint16 inppos; + picoos_uint16 outpos; + picoos_uint16 treepos; + picoos_uint32 curpos = 0, pos; + picoos_uint16 lentable; + picoos_uint16 i; + picoos_uint8 imtnr; + + PICODBG_DEBUG(("start")); + + /* get inmap, outmap, tree offsets */ + if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos)) + && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos)) + && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, + &treepos))) { + + /* all pos are mandatory, verify */ + if (inppos && outpos && treepos) { + dtp->inpmaptable = this->base + inppos; + dtp->outmaptable = this->base + outpos; + dtp->tree = this->base + treepos; + /* precalc beg offset table */ + imtnr=dtp->inpmaptable[0]; + pos=1; + dtp->beg_offset[0] = 1; + for (i = 0; i < imtnr; i++) { + lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 | + dtp->inpmaptable[pos]; + pos += lentable; + dtp->beg_offset[i+1] = pos; + } + } else { + dtp->inpmaptable = NULL; + dtp->outmaptable = NULL; + dtp->tree = NULL; + PICODBG_ERROR(("invalid kb position info")); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + + /* nr of outmaptables is equal 1 for all trees, verify */ + if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) { + PICODBG_ERROR(("wrong number of outmaptables")); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + + /* check if this is an empty table, ie. len == 3 */ + if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE] + == 3) + && (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE + + 1] == 0)) { + /* verify that this is supposed to be an empty table and + set outmaptable to NULL if so */ + if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE] + == PICOKDT_MTTYPE_EMPTY) { + dtp->outmaptable = NULL; + } else { + PICODBG_ERROR(("table length vs. type problem")); + return picoos_emRaiseException(common->em, + PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + } + + dtp->vfields = dtp->tree + 1; + dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3; + dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS]; + dtp->treebody = dtp->qfields + 4 + + (dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/ + + /*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */ + /*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */ + /* verify that nrvfields ad nrqfields are correct */ + if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) || + (PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) { + PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)", + dtp->tree[PICOKDT_NIPOS_NRVFIELDS], + dtp->tree[PICOKDT_NIPOS_NRQFIELDS])); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + dtp->dset = 0; + dtp->dclass = 0; + PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d", + dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable), + (dtp->tree - dtp->inpmaptable))); + return PICO_OK; + } else { + PICODBG_ERROR(("problem reading kb in memory")); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } +} + + +static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this, + picoos_Common common, + kdt_subobj_t *dtp, + kdt_nratt_t nratt, + kdt_nrinpmaptables_t nrinpmt, + kdt_nroutmaptables_t nroutmt, + kdt_mttype_t mttype) { + /* check nr attributes */ + /* check nr inpmaptables */ + /* check nr outmaptables */ + /* check outmaptable is word type */ + if ((nratt != dtp->nrattributes) + || (dtp->inpmaptable == NULL) + || (dtp->outmaptable == NULL) + || (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt) + || (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt) + || (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE] + != mttype)) { + PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d", + dtp->nrattributes, + dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES], + dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES], + dtp->outmaptable[PICOKDT_MTPOS_START + + PICOKDT_MTPOS_TABLETYPE])); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + return PICO_OK; +} + + + +static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + pico_status_t status; + kdtposp_subobj_t *dtposp; + kdt_subobj_t *dt; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + dtposp = (kdtposp_subobj_t *)this->subObj; + dt = &(dtposp->dt); + dt->type = PICOKDT_KDTTYPE_POSP; + if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { + return status; + } + if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP, + PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP, + PICOKDT_MTTYPE_WORD)) != PICO_OK) { + return status; + } + + /* init specialized subobj part */ + for (i = 0; i < PICOKDT_NRATT_POSP; i++) { + dtposp->invec[i] = 0; + } + dtposp->inveclen = 0; + PICODBG_DEBUG(("posp tree initialized")); + return PICO_OK; +} + + +static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + pico_status_t status; + kdtposd_subobj_t *dtposd; + kdt_subobj_t *dt; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + dtposd = (kdtposd_subobj_t *)this->subObj; + dt = &(dtposd->dt); + dt->type = PICOKDT_KDTTYPE_POSD; + if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { + return status; + } + if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD, + PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD, + PICOKDT_MTTYPE_WORD)) != PICO_OK) { + return status; + } + + /* init spezialized subobj part */ + for (i = 0; i < PICOKDT_NRATT_POSD; i++) { + dtposd->invec[i] = 0; + } + dtposd->inveclen = 0; + PICODBG_DEBUG(("posd tree initialized")); + return PICO_OK; +} + + +static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + pico_status_t status; + kdtg2p_subobj_t *dtg2p; + kdt_subobj_t *dt; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + dtg2p = (kdtg2p_subobj_t *)this->subObj; + dt = &(dtg2p->dt); + dt->type = PICOKDT_KDTTYPE_G2P; + if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { + return status; + } + + if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P, + PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P, + PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) { + return status; + } + + /* init spezialized subobj part */ + for (i = 0; i < PICOKDT_NRATT_G2P; i++) { + dtg2p->invec[i] = 0; + } + dtg2p->inveclen = 0; + PICODBG_DEBUG(("g2p tree initialized")); + return PICO_OK; +} + + +static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + pico_status_t status; + kdtphr_subobj_t *dtphr; + kdt_subobj_t *dt; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + dtphr = (kdtphr_subobj_t *)this->subObj; + dt = &(dtphr->dt); + dt->type = PICOKDT_KDTTYPE_PHR; + if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) { + return status; + } + + if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR, + PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR, + PICOKDT_MTTYPE_WORD)) != PICO_OK) { + return status; + } + + /* init spezialized subobj part */ + for (i = 0; i < PICOKDT_NRATT_PHR; i++) { + dtphr->invec[i] = 0; + } + dtphr->inveclen = 0; + PICODBG_DEBUG(("phr tree initialized")); + return PICO_OK; +} + + +static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + pico_status_t status; + kdtacc_subobj_t *dtacc; + kdt_subobj_t *dt; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + dtacc = (kdtacc_subobj_t *)this->subObj; + dt = &(dtacc->dt); + dt->type = PICOKDT_KDTTYPE_ACC; + if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { + return status; + } + + if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC, + PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC, + PICOKDT_MTTYPE_WORD)) != PICO_OK) { + return status; + } + + /* init spezialized subobj part */ + for (i = 0; i < PICOKDT_NRATT_ACC; i++) { + dtacc->invec[i] = 0; + } + dtacc->inveclen = 0; + PICODBG_DEBUG(("acc tree initialized")); + return PICO_OK; +} + + +static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + pico_status_t status; + kdtpam_subobj_t *dtpam; + kdt_subobj_t *dt; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + dtpam = (kdtpam_subobj_t *)this->subObj; + dt = &(dtpam->dt); + dt->type = PICOKDT_KDTTYPE_PAM; + if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { + return status; + } + + if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM, + PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM, + PICOKDT_MTTYPE_WORD)) != PICO_OK) { + return status; + } + + /* init spezialized subobj part */ + for (i = 0; i < PICOKDT_NRATT_PAM; i++) { + dtpam->invec[i] = 0; + } + dtpam->inveclen = 0; + PICODBG_DEBUG(("pam tree initialized")); + return PICO_OK; +} + + +static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this, + picoos_MemoryManager mm) { + if (NULL != this) { + picoos_deallocate(mm, (void *) &this->subObj); + } + return PICO_OK; +} + + +/* we don't offer a specialized constructor for a *KnowledgeBase but + * instead a "specializer" of an allready existing generic + * picoknow_KnowledgeBase */ + +pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this, + picoos_Common common, + const picokdt_kdttype_t kdttype) { + pico_status_t status; + + if (NULL == this) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + this->subDeallocate = kdtSubObjDeallocate; + switch (kdttype) { + case PICOKDT_KDTTYPE_POSP: + this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kdtPosPInitialize(this, common); + break; + case PICOKDT_KDTTYPE_POSD: + this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kdtPosDInitialize(this, common); + break; + case PICOKDT_KDTTYPE_G2P: + this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kdtG2PInitialize(this, common); + break; + case PICOKDT_KDTTYPE_PHR: + this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kdtPhrInitialize(this, common); + break; + case PICOKDT_KDTTYPE_ACC: + this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kdtAccInitialize(this, common); + break; + case PICOKDT_KDTTYPE_PAM: + this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kdtPamInitialize(this, common); + break; + default: + return picoos_emRaiseException(common->em, PICO_ERR_OTHER, + NULL, NULL); + } + + if (status != PICO_OK) { + picoos_deallocate(common->mm, (void *) &this->subObj); + return picoos_emRaiseException(common->em, status, NULL, NULL); + } + return PICO_OK; +} + + +/* ************************************************************/ +/* decision tree getDt* */ +/* ************************************************************/ + +picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj)); +} + +picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj)); +} + +picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj)); +} + +picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj)); +} + +picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj)); +} + +picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj)); +} + + + +/* ************************************************************/ +/* decision tree support functions, tree */ +/* ************************************************************/ + + +typedef enum { + eQuestion = 0, /* index to #bits to identify question */ + eDecide = 1 /* index to #bits to identify decision */ +} kdt_vfields_ind_t; + +typedef enum { + eForkCount = 0, /* index to #bits for number of forks */ + eBitNo = 1, /* index to #bits for index of 1st element */ + eBitCount = 2, /* index to #bits for size of the group */ + eJump = 3, /* index to #bits for offset to reach output node */ + eCut = 4 /* for contin. node: #bits for threshold checked */ +} kdt_qfields_ind_t; + +typedef enum { + eNTerminal = 0, + eNBinary = 1, + eNContinuous = 2, + eNDiscrete = 3 +} kdt_nodetypes_t; + +typedef enum { + eOneValue = 0, + eTwoValues = 1, + eWithoutBitMask = 2, + eBitMask = 3 +} kdt_subsettypes_t; + + +/* Name : kdt_jump + Function: maps the iJump offset to byte + bit coordinates + Input : iJump absolute bit offset (0..(nr-bytes-treebody)*8) + Output : iByteNo the first byte containing the bits to extract + (0..(nr-bytes-treebody)) + iBitNo the first bit to be extracted (0..7) + Returns : void + Notes : updates the iByteNo + iBitNo fields +*/ +static void kdt_jump(const picoos_uint32 iJump, + picoos_uint32 *iByteNo, + picoos_int8 *iBitNo) { + picoos_uint32 iByteSize; + + iByteSize = (iJump / 8 ); + *iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo); + *iByteNo += iByteSize; + if (*iBitNo >= 8) { + (*iByteNo)++; + *iBitNo = 15 - *iBitNo; + } else { + *iBitNo = 7 - *iBitNo; + } +} + + +/* replaced inline for speedup */ +/* Name : kdtIsVal + Function: Returns the binary value of the bit pointed to by iByteNo, iBitNo + Input : iByteNo ofsset to the byte containing the bits to extract + (0..sizeof(treebody)) + iBitNo ofsset to the first bit to be extracted (0..7) + Returns : 0/1 depending on the bit pointed to +*/ +/* +static picoos_uint8 kdtIsVal(register kdt_subobj_t *this, + picoos_uint32 iByteNo, + picoos_int8 iBitNo) { + return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0); +} +*/ + + +/* @todo : consider replacing inline for speedup */ + +/* Name : kdtGetQFieldsVal (was: m_QuestDependentFields) + Function: gets a byte from qfields + Input : this handle to a dt subobj + attind index of the attribute + qind index of the byte to be read + Returns : the requested byte + Notes : check that attind < this->nrattributes needed before calling + this function! +*/ +static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this, + const picoos_uint8 attind, + const kdt_qfields_ind_t qind) { + /* check of qind done in initialize and (for some compilers) with typing */ + /* check of attind needed before calling this function */ + return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind]; +} + + +/* Name : kdtGetShiftVal (was: get_shift_value) + Function: returns the (treebody) value pointed to by iByteNo, iBitNo, + and with size iSize + Input : this reference to the processing unit struct + iSize number of bits to be extracted (0..N) + iByteNo ofsset to the byte containing the bits to extract + (0..sizeof(treebody)) + iBitNo ofsset to the first bit to be extracted (0..7) + Returns : the value requested (if size==0 --> 0 is returned) +*/ +/* +static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this, + const picoos_int16 iSize, + picoos_uint32 *iByteNo, + picoos_int8 *iBitNo) { + picoos_uint32 iVal; + picoos_int16 i; + + iVal = 0; + for (i = iSize-1; i >= 0; i--) { + if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) { + iVal |= ( (1) << i ); + } + (*iBitNo)--; + if (*iBitNo < 0) { + *iBitNo = 7; + (*iByteNo)++; + } + } + return iVal; +} +*/ +/* refactor */ +static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this, + const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo) +{ + picoos_uint32 v, b, iVal; + picoos_int16 i, j, len; + picoos_uint8 val; + + if (iSize < 4) { + iVal = 0; + for (i = iSize - 1; i >= 0; i--) { + /* no check that *iByteNo is within valid treebody range */ + if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) { + iVal |= ((1) << i); + } + (*iBitNo)--; + if (*iBitNo < 0) { + *iBitNo = 7; + (*iByteNo)++; + } + } + return iVal; + } + + b = *iByteNo; + j = *iBitNo; + len = iSize; + *iBitNo = j - iSize; + v = 0; + while (*iBitNo < 0) { + *iBitNo += 8; + (*iByteNo)++; + } + + val = this->treebody[b++]; + if (j < 7) { + switch (j) { + case 0: + val &= 0x01; + break; + case 1: + val &= 0x03; + break; + case 2: + val &= 0x07; + break; + case 3: + val &= 0x0f; + break; + case 4: + val &= 0x1f; + break; + case 5: + val &= 0x3f; + break; + case 6: + val &= 0x7f; + break; + } + } + len -= j + 1; + if (len < 0) { + val >>= -len; + } + v = val; + while (len > 0) { + if (len >= 8) { + j = 8; + } else { + j = len; + } + v <<= j; + val = this->treebody[b++]; + if (j < 8) { + switch (j) { + case 1: + val &= 0x80; + val >>= 7; + break; + case 2: + val &= 0xc0; + val >>= 6; + break; + case 3: + val &= 0xe0; + val >>= 5; + break; + case 4: + val &= 0xf0; + val >>= 4; + break; + case 5: + val &= 0xf8; + val >>= 3; + break; + case 6: + val &= 0xfc; + val >>= 2; + break; + case 7: + val &= 0xfe; + val >>= 1; + break; + } + } + v |= val; + len -= j; + } + return v; +} + + +/* Name : kdtAskTree + Function: Tree Traversal routine + Input : iByteNo ofsset to the first byte containing the bits + to extract (0..sizeof(treebody)) + iBitNo ofsset to the first bit to be extracted (0..7) + Returns : >0 continue, no solution yet found + =0 solution found + <0 error, no solution found + Notes : +*/ +static picoos_int8 kdtAskTree(register kdt_subobj_t *this, + picoos_uint16 *invec, + const kdt_nratt_t invecmax, + picoos_uint32 *iByteNo, + picoos_int8 *iBitNo) { + picoos_uint32 iNodeType; + picoos_uint8 iQuestion; + picoos_int32 iVal; + picoos_int32 iForks; + picoos_int32 iID; + + picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision; + picoos_int32 i; + picoos_char iIsDecide; + + PICODBG_TRACE(("start")); + + /* get node type, value should be in kdt_nodetype_t range */ + iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo); + PICODBG_TRACE(("iNodeType: %d", iNodeType)); + + /* get attribute to be used in question, check if in range, and get val */ + /* check of vfields argument done in initialize */ + iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo); + if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) { + iVal = invec[iQuestion]; + } else { + this->dset = FALSE; + PICODBG_TRACE(("invalid question")); + return -1; /* iQuestion invalid */ + } + iForks = 0; + iID = -1; + PICODBG_TRACE(("iQuestion: %d", iQuestion)); + + switch (iNodeType) { + case eNBinary: { + iForks = 2; + iID = iVal; + break; + } + case eNContinuous: { + iForks = 2; + iID = 1; + iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut), + iByteNo, iBitNo); /*read the threshold*/ + if (iVal <= iCut) { + iID = 0; + } + break; + } + case eNDiscrete: { + iForks = + kdtGetShiftVal(this, + kdtGetQFieldsVal(this, iQuestion, eForkCount), + iByteNo, iBitNo); + + for (i = 0; i < iForks-1; i++) { + iSubsetType = + kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS, + iByteNo, iBitNo); + + switch (iSubsetType) { + case eOneValue: { + if (iID > -1) { + kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo), + iByteNo, iBitNo); + break; + } + iBitPos = + kdtGetShiftVal(this, + kdtGetQFieldsVal(this, iQuestion, + eBitNo), + iByteNo, iBitNo); + if (iVal == iBitPos) { + iID = i; + } + break; + } + case eTwoValues: { + if (iID > -1) { + kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) + + kdtGetQFieldsVal(this, iQuestion, eBitCount)), + iByteNo, iBitNo); + break; + } + + iBitPos = + kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, + eBitNo), + iByteNo, iBitNo); + iBitCount = + kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, + eBitCount), + iByteNo, iBitNo); + if ((iVal == iBitPos) || (iVal == iBitCount)) { + iID = i; + } + break; + } + case eWithoutBitMask: { + if (iID > -1) { + kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) + + kdtGetQFieldsVal(this, iQuestion, eBitCount)), + iByteNo, iBitNo); + break; + } + + iBitPos = + kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, + eBitNo), + iByteNo, iBitNo); + iBitCount = + kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, + eBitCount), + iByteNo, iBitNo); + if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) { + iID = i; + } + break; + } + case eBitMask: { + iBitPos = 0; + if (iID > -1) { + kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo), + iByteNo, iBitNo); + } else { + iBitPos = + kdtGetShiftVal(this, + kdtGetQFieldsVal(this, iQuestion, + eBitNo), + iByteNo, iBitNo); + } + + iBitCount = + kdtGetShiftVal(this, + kdtGetQFieldsVal(this, iQuestion, + eBitCount), + iByteNo, iBitNo); + if (iID > -1) { + kdt_jump(iBitCount, iByteNo, iBitNo); + break; + } + + if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) { + iPos = iVal - iBitPos; + kdt_jump((iVal - iBitPos), iByteNo, iBitNo); + /* if (kdtIsVal(this, *iByteNo, *iBitNo))*/ + if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) { + iID = i; + } + kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo); + } else { + kdt_jump(iBitCount, iByteNo, iBitNo); + } + break; + }/*end case eBitMask*/ + }/*end switch (iSubsetType)*/ + }/*end for ( i = 0; i < iForks-1; i++ ) */ + + /*default tree branch*/ + if (-1 == iID) { + iID = iForks-1; + } + break; + }/*end case eNDiscrete*/ + }/*end switch (iNodeType)*/ + + for (i = 0; i < iForks; i++) { + iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo); + + PICODBG_TRACE(("doing forks: %d", i)); + + if (!iIsDecide) { + if (iID == i) { + iJump = + kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump), + iByteNo, iBitNo); + kdt_jump(iJump, iByteNo, iBitNo); + this->dset = FALSE; + return 1; /* to be continued, no solution yet found */ + } else { + kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump), + iByteNo, iBitNo); + } + } else { + if (iID == i) { + /* check of vfields argument done in initialize */ + iDecision = kdtGetShiftVal(this, this->vfields[eDecide], + iByteNo, iBitNo); + this->dclass = iDecision; + this->dset = TRUE; + return 0; /* solution found */ + } else { + /* check of vfields argument done in initialize */ + kdt_jump(this->vfields[eDecide], iByteNo, iBitNo); + } + }/*end if (!iIsDecide)*/ + }/*end for (i = 0; i < iForks; i++ )*/ + + this->dset = FALSE; + PICODBG_TRACE(("problem determining class")); + return -1; /* solution not found, problem determining a class */ +} + + + +/* ************************************************************/ +/* decision tree support functions, mappings */ +/* ************************************************************/ + + +/* size==1 -> MapInByte, size==2 -> MapInWord, + size determined from table type contained in kb. + if the inmaptable is empty, outval = inval */ + +static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt, + const picoos_uint8 imtnr, + const picoos_uint16 inval, + picoos_uint16 *outval, + picoos_uint16 *outfallbackval) { + picoos_uint8 size; + picoos_uint32 pos; + picoos_uint16 lentable; + picoos_uint16 posbound; + picoos_uint16 i; + + *outval = 0; + *outfallbackval = 0; + + size = 0; + pos = 0; + + /* check what can be checked */ + if (imtnr >= dt->inpmaptable[pos++]) { /* outside tablenr range? */ + PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d", + dt->inpmaptable[pos-1], imtnr)); + return FALSE; + } + + /* go forward to the needed tablenr */ + if (imtnr > 0) { + pos = dt->beg_offset[imtnr]; + } + + /* get length */ + lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | + dt->inpmaptable[pos]; + posbound = pos + lentable; + pos += 2; + + /* check type of table and set size */ + if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) { + /* empty table no mapping needed */ + PICODBG_TRACE(("empty table: %d", imtnr)); + *outval = inval; + return TRUE; + } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) { + size = 1; + } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) { + size = 2; + } else { + /* wrong table type */ + PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos])); + return FALSE; + } + pos++; + + /* set fallback value in case of failed mapping, and set upper bound pos */ + *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | + dt->inpmaptable[pos]; + pos += 2; + + /* size must be 1 or 2 here, keep 'redundant' so save time */ + if (size == 1) { + for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { + if (inval == dt->inpmaptable[pos]) { + *outval = i; + PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval)); + return TRUE; + } + pos++; + } + } else if (size == 2) { + posbound--; + for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { + if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | + dt->inpmaptable[pos])) { + *outval = i; + PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval)); + return TRUE; + } + pos += 2; + } + } else { + /* impossible size */ + PICODBG_ERROR(("wrong size %d", size)); + return FALSE; + } + + PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval)); + return FALSE; +} + + +static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt, + const picoos_uint8 imtnr, + const picoos_uint8 *inval, + const picoos_uint8 invalmaxlen, + picoos_uint16 *outval, + picoos_uint16 *outfallbackval) { + picoos_uint8 ilen; + picoos_uint8 tlen; + picoos_uint8 cont; + picoos_uint32 pos; + picoos_uint16 lentable; + picoos_uint16 posbound; + picoos_uint16 i; + picoos_uint8 j; + + *outfallbackval = 0; + + pos = 0; + /* check what can be checked */ + if ((imtnr >= dt->inpmaptable[pos++]) || /* outside tablenr range? */ + (invalmaxlen == 0) || /* too short? */ + ((ilen = picobase_det_utf8_length(inval[0])) == 0) || /* invalid? */ + (ilen > invalmaxlen)) { /* not accessible? */ + PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, " + "ilen: %d", + dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen)); + return FALSE; + } + + /* go forward to the needed tablenr */ + for (i = 0; i < imtnr; i++) { + lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | + dt->inpmaptable[pos]; + pos += lentable; + } + + /* get length and check type of inpmaptable */ + lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | + dt->inpmaptable[pos]; + posbound = pos + lentable; + pos += 2; + +#if defined(PICO_DEBUG) + if (1) { + int id; + PICODBG_TRACE(("imtnr %d", imtnr)); + for (id = pos-2; id < posbound; id++) { + PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2), + dt->inpmaptable[id], dt->inpmaptable[id])); + } + } +#endif + + /* check type of table */ + if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) { + /* empty table does not make sense for graph */ + /* wrong table type */ + PICODBG_ERROR(("wrong table type")); + return FALSE; + } + pos++; + + /* set fallback value in case of failed mapping, and set upper bound pos */ + *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | + dt->inpmaptable[pos]; + pos += 2; + + /* sequential search */ + for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { + tlen = picobase_det_utf8_length(dt->inpmaptable[pos]); + if ((pos + tlen) > posbound) { + PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d", + posbound, pos, tlen)); + return FALSE; + } + if (ilen == tlen) { + cont = TRUE; + for (j = 0; cont && (j < ilen); j++) { + if (dt->inpmaptable[pos + j] != inval[j]) { + cont = FALSE; + } + } + if (cont && (j == ilen)) { /* match found */ + *outval = i; + PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d", + posbound, pos, i, tlen)); + return TRUE; + } + } + pos += tlen; + } + PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d", + imtnr, posbound, pos, i, *outfallbackval)); + return FALSE; +} + + +/* size==1 -> MapOutByte, size==2 -> MapOutWord */ +static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt, + const picoos_uint16 inval, + picoos_uint16 *outval) { + picoos_uint8 size; + picoos_uint16 nr; + + /* no check of lentable vs. nr in initialize done */ + + size = 0; + + /* type */ + nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]; + + /* check type of table and set size */ + if (nr == PICOKDT_MTTYPE_EMPTY) { + /* empty table no mapping needed */ + PICODBG_TRACE(("empty table")); + *outval = inval; + return TRUE; + } else if (nr == PICOKDT_MTTYPE_BYTE) { + size = 1; + } else if (nr == PICOKDT_MTTYPE_WORD) { + size = 2; + } else { + /* wrong table type */ + PICODBG_ERROR(("wrong table type %d", nr)); + return FALSE; + } + + /* number of mapvalues */ + nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START + + PICOKDT_MTPOS_NUMBER + 1])) << 8 + | dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER]; + + if (inval < nr) { + if (size == 1) { + *outval = dt->outmaptable[PICOKDT_MTPOS_START + + PICOKDT_MTPOS_MAPSTART + (size * inval)]; + } else { + *outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START + + PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8 + | dt->outmaptable[PICOKDT_MTPOS_START + + PICOKDT_MTPOS_MAPSTART + (size * inval)]; + } + return TRUE; + } else { + *outval = 0; + return FALSE; + } +} + + +/* size==1 -> ReverseMapOutByte, size==2 -> ReverseMapOutWord */ +/* outmaptable also used to map from decoded tree output domain to + direct tree output domain */ +static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt, + const picoos_uint16 inval, + picoos_uint16 *outval, + picoos_uint16 *outfallbackval) { + picoos_uint8 size; + picoos_uint32 pos; + picoos_uint16 lentable; + picoos_uint16 posbound; + picoos_uint16 i; + + /* no check of lentable vs. nr in initialize done */ + + size = 0; + pos = 0; + *outval = 0; + *outfallbackval = 0; + + if (dt->outmaptable == NULL) { + /* empty table no mapping needed */ + PICODBG_TRACE(("empty table")); + *outval = inval; + return TRUE; + } + + /* check what can be checked */ + if (dt->outmaptable[pos++] != 1) { /* only one omt possible */ + PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1])); + return FALSE; + } + + /* get length */ + lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | + dt->outmaptable[pos]; + posbound = pos + lentable; + pos += 2; + + /* check type of table and set size */ + /* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in + ...Initialize the omt is set to NULL if not existing, checked + above */ + + if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) { + size = 1; + } else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) { + size = 2; + } else { + /* wrong table type */ + PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos])); + return FALSE; + } + pos++; + + /* set fallback value in case of failed mapping, and set upper bound pos */ + *outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | + dt->outmaptable[pos]; + pos += 2; + + /* size must be 1 or 2 here, keep 'redundant' so save time */ + if (size == 1) { + for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { + if (inval == dt->outmaptable[pos]) { + *outval = i; + PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval)); + return TRUE; + } + pos++; + } + } else if (size == 2) { + posbound--; + for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { + if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | + dt->outmaptable[pos])) { + *outval = i; + PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval)); + return TRUE; + } + pos += 2; + } + } else { + /* impossible size */ + PICODBG_ERROR(("wrong size %d", size)); + return FALSE; + } + + PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval)); + return FALSE; +} + + +picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this, + const picoos_uint16 inval, + picoos_uint16 *outval, + picoos_uint16 *outfallbackval) { + + kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this; + kdt_subobj_t * dt = &(dtposd->dt); + return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval); +} + +/* not yet impl. size==1 -> MapOutByteToVar, + fix: size==2 -> MapOutWordToVar */ +static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt, + const picoos_uint16 inval, + picoos_uint8 *nr, + picoos_uint16 *outval, + const picoos_uint16 outvalmaxlen) { + picoos_uint16 pos; + picoos_uint16 off2ind; + picoos_uint16 lentable; + picoos_uint16 nrinbytes; + picoos_uint8 size; + picoos_uint16 offset1; + picoos_uint16 i; + + if (dt->outmaptable == NULL) { + /* empty table not possible */ + PICODBG_ERROR(("no table found")); + return FALSE; + } + + /* nr of tables == 1 already checked in *Initialize, no need here, go + directly to position 1 */ + pos = 1; + + /* get length of table */ + lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 | + dt->outmaptable[pos]); + pos += 2; + + /* check table type */ + if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) { + /* wrong table type */ + PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos])); + return FALSE; + } + size = 2; + pos++; + + /* get nr of ele in maptable (= nr of possible invals) */ + nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | + dt->outmaptable[pos]); + pos += 2; + + /* check what's checkable */ + if (nrinbytes == 0) { + PICODBG_ERROR(("table with length zero")); + return FALSE; + } else if (inval >= nrinbytes) { + PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes)); + return FALSE; + } + + PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval, + lentable, nrinbytes, pos)); + + /* set off2ind to the position of the start of offset2-val */ + /* offset2 points to start of next ele */ + off2ind = pos + (size*inval); + + /* get number of output values, offset2 - offset1 */ + if (inval == 0) { + offset1 = 0; + } else { + offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 | + dt->outmaptable[off2ind - 2]); + } + *nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 | + dt->outmaptable[off2ind]) - offset1; + + PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos)); + + /* set pos to position of 1st value being mapped to */ + pos += (size * nrinbytes) + offset1; + + if ((pos + *nr - 1) > lentable) { + /* outside table, should not happen */ + PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d", + pos, *nr, lentable)); + return FALSE; + } + if (*nr > outvalmaxlen) { + /* not enough space in outval */ + PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen)); + return FALSE; + } + + /* finally, copy outmap result to outval */ + for (i = 0; i < *nr; i++) { + outval[i] = dt->outmaptable[pos++]; + } + return TRUE; +} + + + +/* ************************************************************/ +/* decision tree POS prediction (PosP) functions */ +/* ************************************************************/ + +/* number of prefix and suffix graphemes used to construct the input vector */ +#define KDT_POSP_NRGRAPHPREFATT 4 +#define KDT_POSP_NRGRAPHSUFFATT 6 +#define KDT_POSP_NRGRAPHATT 10 + +/* positions of specgraph and nrgraphs attributes */ +#define KDT_POSP_SPECGRAPHATTPOS 10 +#define KDT_POSP_NRGRAPHSATTPOS 11 + + +/* construct PosP input vector + + PosP invec: 12 elements + + prefix 0-3 prefix graphemes (encoded using tree inpmaptable 0-3) + suffix 4-9 suffix graphemes (encoded using tree inpmaptable 4-9) + isspecchar 10 is a special grapheme (e.g. hyphen) inside the word (0/1)? + nr-utf-graphs 11 number of graphemes (ie. UTF8 chars) + + if there are less than 10 graphemes, each grapheme is used only + once, with the suffix having higher priority, ie. elements 0-9 are + filled as follows: + + #graph + 1 0 0 0 0 0 0 0 0 0 1 + 2 0 0 0 0 0 0 0 0 1 2 + 3 0 0 0 0 0 0 0 1 2 3 + 4 0 0 0 0 0 0 1 2 3 4 + 5 0 0 0 0 0 1 2 3 4 5 + 6 0 0 0 0 1 2 3 4 5 6 + 7 1 0 0 0 2 3 4 5 6 7 + 8 1 2 0 0 3 4 5 6 7 8 + 9 1 2 3 0 4 5 6 7 8 9 + 10 1 2 3 4 5 6 7 8 9 10 + 11 1 2 3 4 6 7 8 9 10 11 + ... + + 1-6: Fill chbuf + 7-10: front to invec 1st part, remove front, add rear + >10: remove front, add rear + no more graph -> + while chbuflen>0: + add rear to the last empty slot in 2nd part of invec, remove rear +*/ + + +picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this, + const picoos_uint8 *graph, + const picoos_uint16 graphlen, + const picoos_uint8 specgraphflag) { + kdtposp_subobj_t *dtposp; + + /* utf8 circular char buffer, used as restricted input deque */ + /* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */ + /* max of UTF8_MAXLEN bytes per utf8 char */ + picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN]; + picoos_uint8 chbrear; /* next free pos */ + picoos_uint8 chbfront; /* next read pos */ + picoos_uint8 chblen; /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */ + + picoos_uint16 poscg; /* position of current graph (= utf8 char) */ + picoos_uint16 lencg = 0; /* length of current grapheme */ + picoos_uint16 nrutfg; /* number of utf graphemes */ + picoos_uint8 invecpos; /* next element to add in invec */ + picoos_uint16 fallback; /* fallback value for failed graph encodings */ + picoos_uint8 i; + + dtposp = (kdtposp_subobj_t *)this; + chbrear = 0; + chbfront = 0; + chblen = 0; + poscg = 0; + nrutfg = 0; + invecpos = 0; + + PICODBG_DEBUG(("graphlen %d", graphlen)); + + /* not needed, since all elements are set + for (i = 0; i < PICOKDT_NRATT_POSP; i++) { + dtposp->invec[i] = '\x63'; + } + */ + + dtposp->inveclen = 0; + + while ((poscg < graphlen) && + ((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) { + if (chblen >= KDT_POSP_NRGRAPHSUFFATT) { /* chbuf full */ + if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */ + /* att-encode front utf graph and add in invec */ + if (!kdtMapInGraph(&(dtposp->dt), invecpos, + chbuf[chbfront], PICOBASE_UTF8_MAXLEN, + &(dtposp->invec[invecpos]), + &fallback)) { + if (fallback) { + dtposp->invec[invecpos] = fallback; + } else { + return FALSE; + } + } + invecpos++; + } + /* remove front utf graph */ + chbfront++; + chbfront %= KDT_POSP_NRGRAPHSUFFATT; + chblen--; + } + /* add current utf graph to chbuf */ + for (i=0; i<lencg; i++) { + chbuf[chbrear][i] = graph[poscg++]; + } + if (i < PICOBASE_UTF8_MAXLEN) { + chbuf[chbrear][i] = '\0'; + } + chbrear++; + chbrear %= KDT_POSP_NRGRAPHSUFFATT; + chblen++; + /* increase utf graph count */ + nrutfg++; + } + + if ((lencg == 0) || (chblen == 0)) { + return FALSE; + } else if (chblen > 0) { + + while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */ + if (!kdtMapInGraph(&(dtposp->dt), invecpos, + PICOKDT_OUTSIDEGRAPH_DEFSTR, + PICOKDT_OUTSIDEGRAPH_DEFLEN, + &(dtposp->invec[invecpos]), &fallback)) { + if (fallback) { + dtposp->invec[invecpos] = fallback; + } else { + return FALSE; + } + } + invecpos++; + } + + for (i = (KDT_POSP_NRGRAPHATT - 1); + i >= KDT_POSP_NRGRAPHPREFATT; i--) { + if (chblen > 0) { + if (chbrear == 0) { + chbrear = KDT_POSP_NRGRAPHSUFFATT - 1; + } else { + chbrear--; + } + if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear], + PICOBASE_UTF8_MAXLEN, + &(dtposp->invec[i]), &fallback)) { + if (fallback) { + dtposp->invec[i] = fallback; + } else { + return FALSE; + } + } + chblen--; + } else { + if (!kdtMapInGraph(&(dtposp->dt), i, + PICOKDT_OUTSIDEGRAPH_DEFSTR, + PICOKDT_OUTSIDEGRAPH_DEFLEN, + &(dtposp->invec[i]), &fallback)) { + if (fallback) { + dtposp->invec[i] = fallback; + } else { + return FALSE; + } + } + } + } + + /* set isSpecChar attribute, reuse var i */ + i = (specgraphflag ? 1 : 0); + if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i, + &(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]), + &fallback)) { + if (fallback) { + dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback; + } else { + return FALSE; + } + } + + /* set nrGraphs attribute */ + if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg, + &(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]), + &fallback)) { + if (fallback) { + dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback; + } else { + return FALSE; + } + } + PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]", + dtposp->invec[0], dtposp->invec[1], dtposp->invec[2], + dtposp->invec[3], dtposp->invec[4], dtposp->invec[5], + dtposp->invec[6], dtposp->invec[7], dtposp->invec[8], + dtposp->invec[9], dtposp->invec[10], + dtposp->invec[11], dtposp->invec[12])); + dtposp->inveclen = PICOKDT_NRINPMT_POSP; + return TRUE; + } + + return FALSE; +} + + +picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) { + picoos_uint32 iByteNo; + picoos_int8 iBitNo; + picoos_int8 rv; + kdtposp_subobj_t *dtposp; + kdt_subobj_t *dt; + + dtposp = (kdtposp_subobj_t *)this; + dt = &(dtposp->dt); + iByteNo = 0; + iBitNo = 7; + while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP, + &iByteNo, &iBitNo)) > 0) { + PICODBG_TRACE(("asking tree")); + } + PICODBG_DEBUG(("done: %d", dt->dclass)); + return ((rv == 0) && dt->dset); +} + + +picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this, + picokdt_classify_result_t *dtres) { + kdtposp_subobj_t *dtposp; + picoos_uint16 val; + + dtposp = (kdtposp_subobj_t *)this; + + if (dtposp->dt.dset && + kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) { + dtres->set = TRUE; + dtres->class = val; + return TRUE; + } else { + dtres->set = FALSE; + return FALSE; + } +} + + + +/* ************************************************************/ +/* decision tree POS disambiguation (PosD) functions */ +/* ************************************************************/ + + +picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this, + const picoos_uint16 * input) { + kdtposd_subobj_t *dtposd; + picoos_uint8 i; + picoos_uint16 fallback = 0; + + dtposd = (kdtposd_subobj_t *)this; + dtposd->inveclen = 0; + + PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]", + input[0], input[1], input[2], + input[3], input[4], input[5], + input[6])); + for (i = 0; i < PICOKDT_NRATT_POSD; i++) { + + /* do the imt mapping for all inval */ + if (!kdtMapInFixed(&(dtposd->dt), i, input[i], + &(dtposd->invec[i]), &fallback)) { + if (fallback) { + PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback)); + dtposd->invec[i] = fallback; + } else { + PICODBG_ERROR(("problem doing input mapping")); + return FALSE; + } + } + } + + PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]", + dtposd->invec[0], dtposd->invec[1], dtposd->invec[2], + dtposd->invec[3], dtposd->invec[4], dtposd->invec[5], + dtposd->invec[6])); + dtposd->inveclen = PICOKDT_NRINPMT_POSD; + return TRUE; +} + + +picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this, + picoos_uint16 *treeout) { + picoos_uint32 iByteNo; + picoos_int8 iBitNo; + picoos_int8 rv; + kdtposd_subobj_t *dtposd; + kdt_subobj_t *dt; + + dtposd = (kdtposd_subobj_t *)this; + dt = &(dtposd->dt); + iByteNo = 0; + iBitNo = 7; + while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD, + &iByteNo, &iBitNo)) > 0) { + PICODBG_TRACE(("asking tree")); + } + PICODBG_DEBUG(("done: %d", dt->dclass)); + if ((rv == 0) && dt->dset) { + *treeout = dt->dclass; + return TRUE; + } else { + return FALSE; + } +} + + +/* decompose the tree output and return the class in dtres + dtres: POS classification result + returns: TRUE if okay, FALSE otherwise +*/ +picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this, + picokdt_classify_result_t *dtres) { + kdtposd_subobj_t *dtposd; + picoos_uint16 val; + + dtposd = (kdtposd_subobj_t *)this; + + if (dtposd->dt.dset && + kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) { + dtres->set = TRUE; + dtres->class = val; + return TRUE; + } else { + dtres->set = FALSE; + return FALSE; + } +} + + + +/* ************************************************************/ +/* decision tree grapheme-to-phoneme (G2P) functions */ +/* ************************************************************/ + + +/* get the nr'th (starting at 0) utf char in utfgraph */ +static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph, + const picoos_uint16 graphlen, + const picoos_uint16 nr, + picoos_uint8 *utf8char) { + picoos_uint16 i; + picoos_uint32 pos; + + pos = 0; + for (i = 0; i < nr; i++) { + if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) { + return FALSE; + } + } + return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char); +} + +/* determine the utfchar count (starting at 1) of the utfchar starting at pos */ +static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph, + const picoos_uint16 graphlen, + const picoos_uint16 pos) { + picoos_uint32 postmp; + picoos_uint16 count; + + count = 0; + postmp = 0; + while ((postmp <= pos) && (count < graphlen)) { + if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) { + PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d", + count, pos, postmp)); + return count + 1; + } + count++; + } + return count; +} + + +picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this, + const picoos_uint8 *graph, + const picoos_uint16 graphlen, + const picoos_uint8 count, + const picoos_uint8 pos, + const picoos_uint8 nrvow, + const picoos_uint8 ordvow, + picoos_uint8 *primstressflag, + const picoos_uint16 phonech1, + const picoos_uint16 phonech2, + const picoos_uint16 phonech3) { + kdtg2p_subobj_t *dtg2p; + picoos_uint16 fallback = 0; + picoos_uint8 iAttr; + picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1]; + picoos_uint16 inval; + picoos_int16 cinv; + picoos_uint8 retval; + picoos_int32 utfgraphlen; + picoos_uint16 utfcount; + + dtg2p = (kdtg2p_subobj_t *)this; + retval = TRUE; + inval = 0; + + PICODBG_TRACE(("in: [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos, + nrvow, ordvow, *primstressflag, phonech1, phonech2, + phonech3)); + + dtg2p->inveclen = 0; + + /* many speed-ups possible */ + + /* graph attributes */ + /* count > = <= count + iAttr lowbound eow upbound delta + 0 4 4 graphlen 5 + 1 3 3 graphlen 4 + 2 2 2 graphlen 3 + 3 1 1 graphlen 2 + 4 0 - graphlen 1 + + 5 0 graphlen graphlen-1 0 + 6 0 graphlen-1 graphlen-2 -1 + 7 0 graphlen-2 graphlen-3 -2 + 8 0 graphlen-3 graphlen-4 -3 + */ + + /* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */ + + utfgraphlen = picobase_utf8_length(graph, graphlen); + if (utfgraphlen <= 0) { + utfgraphlen = 0; + } + utfcount = kdtGetUTF8Nr(graph, graphlen, count); + + cinv = 4; + for (iAttr = 0; iAttr < 5; iAttr++) { + if ((utfcount > cinv) && (utfcount <= utfgraphlen)) { + +/* utf8char[0] = graph[count - cinv - 1];*/ + if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1, + utf8char)) { + PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1)); + utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; + utf8char[1] = '\0'; + } + } else { + if ((utfcount == cinv) && (iAttr != 4)) { + utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH; + } else { + utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; + } + utf8char[1] = '\0'; + } + + if (!kdtMapInGraph(&(dtg2p->dt), iAttr, + utf8char, PICOBASE_UTF8_MAXLEN, + &(dtg2p->invec[iAttr]), + &fallback)) { + if (fallback) { + dtg2p->invec[iAttr] = fallback; + } else { + PICODBG_WARN(("setting attribute %d to zero", iAttr)); + dtg2p->invec[iAttr] = 0; + retval = FALSE; + } + } + PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0])); + cinv--; + } + + /* graph attributes right (context 1/2/3/4), MapInGraph */ + cinv = utfgraphlen; + for (iAttr = 5; iAttr < 9; iAttr++) { + if ((utfcount > 0) && (utfcount <= (cinv - 1))) { +/* utf8char[0] = graph[count + graphlen - cinv];*/ + if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv, + utf8char)) { + PICODBG_WARN(("problem getting UTF char %d", + utfcount+utfgraphlen-cinv-1)); + utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; + utf8char[1] = '\0'; + } + } else { + if (utfcount == cinv) { + utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH; + utf8char[1] = '\0'; + } else { + utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; + utf8char[1] = '\0'; + } + } + if (!kdtMapInGraph(&(dtg2p->dt), iAttr, + utf8char, PICOBASE_UTF8_MAXLEN, + &(dtg2p->invec[iAttr]), + &fallback)) { + if (fallback) { + dtg2p->invec[iAttr] = fallback; + } else { + PICODBG_WARN(("setting attribute %d to zero", iAttr)); + dtg2p->invec[iAttr] = 0; + retval = FALSE; + } + } + PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0])); + cinv--; + } + + /* other attributes, MapInFixed */ + for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) { + switch (iAttr) { + case 9: /* word POS, Fix1 */ + inval = pos; + break; + case 10: /* nr of vowel-like graphs in word, if vowel, Fix2 */ + inval = nrvow; + break; + case 11: /* order of current vowel-like graph in word, Fix2 */ + inval = ordvow; + break; + case 12: /* primary stress mark, Fix2 */ + if (*primstressflag == 1) { + /*already set previously*/ + inval = 1; + } else { + inval = 0; + } + break; + case 13: /* phone chunk right context +1, Hist */ + inval = phonech1; + break; + case 14: /* phone chunk right context +2, Hist */ + inval = phonech2; + break; + case 15: /* phone chunk right context +3, Hist */ + inval = phonech3; + break; + } + + PICODBG_TRACE(("invec %d %d", iAttr, inval)); + + if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval, + &(dtg2p->invec[iAttr]), &fallback)) { + if (fallback) { + dtg2p->invec[iAttr] = fallback; + } else { + PICODBG_WARN(("setting attribute %d to zero", iAttr)); + dtg2p->invec[iAttr] = 0; + retval = FALSE; + } + } + } + + PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|" + "%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1], + dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4], + dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7], + dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10], + dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13], + dtg2p->invec[14], dtg2p->invec[15])); + + dtg2p->inveclen = PICOKDT_NRINPMT_G2P; + return retval; +} + + + + +picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this, + picoos_uint16 *treeout) { + picoos_uint32 iByteNo; + picoos_int8 iBitNo; + picoos_int8 rv; + kdtg2p_subobj_t *dtg2p; + kdt_subobj_t *dt; + + dtg2p = (kdtg2p_subobj_t *)this; + dt = &(dtg2p->dt); + iByteNo = 0; + iBitNo = 7; + while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P, + &iByteNo, &iBitNo)) > 0) { + PICODBG_TRACE(("asking tree")); + } + PICODBG_TRACE(("done: %d", dt->dclass)); + if ((rv == 0) && dt->dset) { + *treeout = dt->dclass; + return TRUE; + } else { + return FALSE; + } +} + + + +picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this, + picokdt_classify_vecresult_t *dtvres) { + kdtg2p_subobj_t *dtg2p; + + dtg2p = (kdtg2p_subobj_t *)this; + + if (dtg2p->dt.dset && + kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr), + dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) { + return TRUE; + } else { + dtvres->nr = 0; + return FALSE; + } + return TRUE; +} + + + +/* ************************************************************/ +/* decision tree phrasing (PHR) functions */ +/* ************************************************************/ + +picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this, + const picoos_uint8 pre2, + const picoos_uint8 pre1, + const picoos_uint8 src, + const picoos_uint8 fol1, + const picoos_uint8 fol2, + const picoos_uint16 nrwordspre, + const picoos_uint16 nrwordsfol, + const picoos_uint16 nrsyllsfol) { + kdtphr_subobj_t *dtphr; + picoos_uint8 i; + picoos_uint16 inval = 0; + picoos_uint16 fallback = 0; + + dtphr = (kdtphr_subobj_t *)this; + PICODBG_DEBUG(("in: [%d,%d|%d|%d,%d|%d,%d,%d]", + pre2, pre1, src, fol1, fol2, + nrwordspre, nrwordsfol, nrsyllsfol)); + dtphr->inveclen = 0; + + for (i = 0; i < PICOKDT_NRATT_PHR; i++) { + switch (i) { + case 0: inval = pre2; break; + case 1: inval = pre1; break; + case 2: inval = src; break; + case 3: inval = fol1; break; + case 4: inval = fol2; break; + case 5: inval = nrwordspre; break; + case 6: inval = nrwordsfol; break; + case 7: inval = nrsyllsfol; break; + default: + PICODBG_ERROR(("size mismatch")); + return FALSE; + break; + } + + /* do the imt mapping for all inval */ + if (!kdtMapInFixed(&(dtphr->dt), i, inval, + &(dtphr->invec[i]), &fallback)) { + if (fallback) { + dtphr->invec[i] = fallback; + } else { + PICODBG_ERROR(("problem doing input mapping")); + return FALSE; + } + } + } + + PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]", + dtphr->invec[0], dtphr->invec[1], dtphr->invec[2], + dtphr->invec[3], dtphr->invec[4], dtphr->invec[5], + dtphr->invec[6], dtphr->invec[7])); + dtphr->inveclen = PICOKDT_NRINPMT_PHR; + return TRUE; +} + + +picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) { + picoos_uint32 iByteNo; + picoos_int8 iBitNo; + picoos_int8 rv; + kdtphr_subobj_t *dtphr; + kdt_subobj_t *dt; + + dtphr = (kdtphr_subobj_t *)this; + dt = &(dtphr->dt); + iByteNo = 0; + iBitNo = 7; + while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR, + &iByteNo, &iBitNo)) > 0) { + PICODBG_TRACE(("asking tree")); + } + PICODBG_DEBUG(("done: %d", dt->dclass)); + return ((rv == 0) && dt->dset); +} + + +picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this, + picokdt_classify_result_t *dtres) { + kdtphr_subobj_t *dtphr; + picoos_uint16 val; + + dtphr = (kdtphr_subobj_t *)this; + + if (dtphr->dt.dset && + kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) { + dtres->set = TRUE; + dtres->class = val; + return TRUE; + } else { + dtres->set = FALSE; + return FALSE; + } +} + + + +/* ************************************************************/ +/* decision tree phono-acoustical model (PAM) functions */ +/* ************************************************************/ + +picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this, + const picoos_uint8 *vec, + const picoos_uint8 veclen) { + kdtpam_subobj_t *dtpam; + picoos_uint8 i; + picoos_uint16 fallback = 0; + + dtpam = (kdtpam_subobj_t *)this; + + PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d", + vec[0], vec[1], vec[2], vec[3], vec[4], + vec[5], vec[6], vec[7], vec[8], vec[9])); + PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d", + vec[10], vec[11], vec[12], vec[13], vec[14], + vec[15], vec[16], vec[17], vec[18], vec[19])); + PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d", + vec[20], vec[21], vec[22], vec[23], vec[24], + vec[25], vec[26], vec[27], vec[28], vec[29])); + PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d", + vec[30], vec[31], vec[32], vec[33], vec[34], + vec[35], vec[36], vec[37], vec[38], vec[39])); + PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d", + vec[40], vec[41], vec[42], vec[43], vec[44], + vec[45], vec[46], vec[47], vec[48], vec[49])); + PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d", + vec[50], vec[51], vec[52], vec[53], vec[54], + vec[55], vec[56], vec[57], vec[58], vec[59])); + + dtpam->inveclen = 0; + + /* check veclen */ + if (veclen != PICOKDT_NRINPMT_PAM) { + PICODBG_ERROR(("wrong number of input vector elements")); + return FALSE; + } + + for (i = 0; i < PICOKDT_NRATT_PAM; i++) { + + /* do the imt mapping for all vec eles */ + if (!kdtMapInFixed(&(dtpam->dt), i, vec[i], + &(dtpam->invec[i]), &fallback)) { + if (fallback) { + dtpam->invec[i] = fallback; + } else { + PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i])); + return FALSE; + } + } + } + + PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d", + dtpam->invec[0], dtpam->invec[1], dtpam->invec[2], + dtpam->invec[3], dtpam->invec[4], dtpam->invec[5], + dtpam->invec[6], dtpam->invec[7], dtpam->invec[8], + dtpam->invec[9])); + PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d", + dtpam->invec[10], dtpam->invec[11], dtpam->invec[12], + dtpam->invec[13], dtpam->invec[14], dtpam->invec[15], + dtpam->invec[16], dtpam->invec[17], dtpam->invec[18], + dtpam->invec[19])); + PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d", + dtpam->invec[20], dtpam->invec[21], dtpam->invec[22], + dtpam->invec[23], dtpam->invec[24], dtpam->invec[25], + dtpam->invec[26], dtpam->invec[27], dtpam->invec[28], + dtpam->invec[29])); + PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d", + dtpam->invec[30], dtpam->invec[31], dtpam->invec[32], + dtpam->invec[33], dtpam->invec[34], dtpam->invec[35], + dtpam->invec[36], dtpam->invec[37], dtpam->invec[38], + dtpam->invec[39])); + PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d", + dtpam->invec[40], dtpam->invec[41], dtpam->invec[42], + dtpam->invec[43], dtpam->invec[44], dtpam->invec[45], + dtpam->invec[46], dtpam->invec[47], dtpam->invec[48], + dtpam->invec[49])); + PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d", + dtpam->invec[50], dtpam->invec[51], dtpam->invec[52], + dtpam->invec[53], dtpam->invec[54], dtpam->invec[55], + dtpam->invec[56], dtpam->invec[57], dtpam->invec[58], + dtpam->invec[59])); + + dtpam->inveclen = PICOKDT_NRINPMT_PAM; + return TRUE; +} + + +picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) { + picoos_uint32 iByteNo; + picoos_int8 iBitNo; + picoos_int8 rv; + kdtpam_subobj_t *dtpam; + kdt_subobj_t *dt; + + dtpam = (kdtpam_subobj_t *)this; + dt = &(dtpam->dt); + iByteNo = 0; + iBitNo = 7; + while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM, + &iByteNo, &iBitNo)) > 0) { + PICODBG_TRACE(("asking tree")); + } + PICODBG_DEBUG(("done: %d", dt->dclass)); + return ((rv == 0) && dt->dset); +} + + +picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this, + picokdt_classify_result_t *dtres) { + kdtpam_subobj_t *dtpam; + picoos_uint16 val; + + dtpam = (kdtpam_subobj_t *)this; + + if (dtpam->dt.dset && + kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) { + dtres->set = TRUE; + dtres->class = val; + return TRUE; + } else { + dtres->set = FALSE; + return FALSE; + } +} + + + +/* ************************************************************/ +/* decision tree accentuation (ACC) functions */ +/* ************************************************************/ + +picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this, + const picoos_uint8 pre2, + const picoos_uint8 pre1, + const picoos_uint8 src, + const picoos_uint8 fol1, + const picoos_uint8 fol2, + const picoos_uint16 hist1, + const picoos_uint16 hist2, + const picoos_uint16 nrwordspre, + const picoos_uint16 nrsyllspre, + const picoos_uint16 nrwordsfol, + const picoos_uint16 nrsyllsfol, + const picoos_uint16 footwordsfol, + const picoos_uint16 footsyllsfol) { + kdtacc_subobj_t *dtacc; + picoos_uint8 i; + picoos_uint16 inval = 0; + picoos_uint16 fallback = 0; + + dtacc = (kdtacc_subobj_t *)this; + PICODBG_DEBUG(("in: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", + pre2, pre1, src, fol1, fol2, hist1, hist2, + nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol, + footwordsfol, footsyllsfol)); + dtacc->inveclen = 0; + + for (i = 0; i < PICOKDT_NRATT_ACC; i++) { + switch (i) { + case 0: inval = pre2; break; + case 1: inval = pre1; break; + case 2: inval = src; break; + case 3: inval = fol1; break; + case 4: inval = fol2; break; + case 5: inval = hist1; break; + case 6: inval = hist2; break; + case 7: inval = nrwordspre; break; + case 8: inval = nrsyllspre; break; + case 9: inval = nrwordsfol; break; + case 10: inval = nrsyllsfol; break; + case 11: inval = footwordsfol; break; + case 12: inval = footsyllsfol; break; + default: + PICODBG_ERROR(("size mismatch")); + return FALSE; + break; + } + + if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) { + /* in input to this function the HISTORY_ZERO is used to + mark the no-value-available case. For sparsity reasons + this was not used in the training. For + no-value-available cases, instead, do reverse out + mapping of ACC0 to get tree domain for ACC0 */ + if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0, + &inval, &fallback)) { + if (fallback) { + inval = fallback; + } else { + PICODBG_ERROR(("problem doing reverse output mapping")); + return FALSE; + } + } + } + + /* do the imt mapping for all inval */ + if (!kdtMapInFixed(&(dtacc->dt), i, inval, + &(dtacc->invec[i]), &fallback)) { + if (fallback) { + dtacc->invec[i] = fallback; + } else { + PICODBG_ERROR(("problem doing input mapping")); + return FALSE; + } + } + } + + PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", + dtacc->invec[0], dtacc->invec[1], dtacc->invec[2], + dtacc->invec[3], dtacc->invec[4], dtacc->invec[5], + dtacc->invec[6], dtacc->invec[7], dtacc->invec[8], + dtacc->invec[9], dtacc->invec[10], dtacc->invec[11], + dtacc->invec[12])); + dtacc->inveclen = PICOKDT_NRINPMT_ACC; + return TRUE; +} + + +picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this, + picoos_uint16 *treeout) { + picoos_uint32 iByteNo; + picoos_int8 iBitNo; + picoos_int8 rv; + kdtacc_subobj_t *dtacc; + kdt_subobj_t *dt; + + dtacc = (kdtacc_subobj_t *)this; + dt = &(dtacc->dt); + iByteNo = 0; + iBitNo = 7; + while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC, + &iByteNo, &iBitNo)) > 0) { + PICODBG_TRACE(("asking tree")); + } + PICODBG_TRACE(("done: %d", dt->dclass)); + if ((rv == 0) && dt->dset) { + *treeout = dt->dclass; + return TRUE; + } else { + return FALSE; + } +} + + +picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this, + picokdt_classify_result_t *dtres) { + kdtacc_subobj_t *dtacc; + picoos_uint16 val; + + dtacc = (kdtacc_subobj_t *)this; + + if (dtacc->dt.dset && + kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) { + dtres->set = TRUE; + dtres->class = val; + return TRUE; + } else { + dtres->set = FALSE; + return FALSE; + } +} + +#ifdef __cplusplus +} +#endif + + +/* end */ |