summaryrefslogtreecommitdiffstats
path: root/lib/picokdt.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/picokdt.h')
-rw-r--r--lib/picokdt.h465
1 files changed, 465 insertions, 0 deletions
diff --git a/lib/picokdt.h b/lib/picokdt.h
new file mode 100644
index 0000000..3ef973c
--- /dev/null
+++ b/lib/picokdt.h
@@ -0,0 +1,465 @@
+/*
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file picokdt.h
+ *
+ * knowledge handling for decision trees
+ *
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ * All rights reserved.
+ *
+ * History:
+ * - 2009-04-20 -- initial version
+ *
+ */
+
+#ifndef PICOKDT_H_
+#define PICOKDT_H_
+
+#include "picoos.h"
+#include "picoknow.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+}
+#endif
+
+
+/* ************************************************************/
+/*
+ Several specialized decision trees kb are provided by this
+ knowledge handling module:
+
+ - Part of speech prediction decision tree: ...kdt_PosP
+ - Part of speech disambiguation decision tree: ...kdt_PosD
+ - Grapheme-to-phoneme decision tree: ...kdt_G2P
+ - Phrasing decision tree: ...kdt_PHR
+ - Accentuation decision tree: ...kdt_ACC
+ these 5 tree types may be unified in the future to a single type
+
+ - Phono-acoustical model trees: ...kdt_PAM
+ (actually 11 trees, but all have the same characteristics and
+ are instances of the same class)
+*/
+/* ************************************************************/
+
+
+/* ************************************************************/
+/* defines and functions to create specialized kb, */
+/* to be used by picorsrc only */
+/* ************************************************************/
+
+typedef enum {
+ PICOKDT_KDTTYPE_POSP,
+ PICOKDT_KDTTYPE_POSD,
+ PICOKDT_KDTTYPE_G2P,
+ PICOKDT_KDTTYPE_PHR,
+ PICOKDT_KDTTYPE_ACC,
+ PICOKDT_KDTTYPE_PAM
+} picokdt_kdttype_t;
+
+pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,
+ picoos_Common common,
+ const picokdt_kdttype_t type);
+
+
+/* ************************************************************/
+/* decision tree types (opaque) and get Tree functions */
+/* ************************************************************/
+
+/* decision tree types */
+typedef struct picokdt_dtposp * picokdt_DtPosP;
+typedef struct picokdt_dtposd * picokdt_DtPosD;
+typedef struct picokdt_dtg2p * picokdt_DtG2P;
+typedef struct picokdt_dtphr * picokdt_DtPHR;
+typedef struct picokdt_dtacc * picokdt_DtACC;
+typedef struct picokdt_dtpam * picokdt_DtPAM;
+
+/* return kb decision tree for usage in PU */
+picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this);
+picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this);
+picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this);
+picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this);
+picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this);
+picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this);
+
+
+/* number of attributes (= input vector size) for each tree type */
+typedef enum {
+ PICOKDT_NRATT_POSP = 12,
+ PICOKDT_NRATT_POSD = 7,
+ PICOKDT_NRATT_G2P = 16,
+ PICOKDT_NRATT_PHR = 8,
+ PICOKDT_NRATT_ACC = 13,
+ PICOKDT_NRATT_PAM = 60
+} kdt_nratt_t;
+
+
+/* ************************************************************/
+/* decision tree classification result type */
+/* ************************************************************/
+
+typedef struct {
+ picoos_uint8 set; /* TRUE if class set, FALSE otherwise */
+ picoos_uint16 class;
+} picokdt_classify_result_t;
+
+
+/* maximum number of output values the tree output is mapped to */
+#define PICOKDT_MAXSIZE_OUTVEC 8
+
+typedef struct {
+ picoos_uint8 nr; /* 0 if no class set, nr of values set otherwise */
+ picoos_uint16 classvec[PICOKDT_MAXSIZE_OUTVEC];
+} picokdt_classify_vecresult_t;
+
+
+/* ************************************************************/
+/* decision tree functions */
+/* ************************************************************/
+
+/* constructInVec:
+ for every tree type there is a constructInVec function to construct
+ the size-optimized input vector for the tree using the input map
+ tables that are part of the decistion tree knowledge base. The
+ constructed input vector is stored in the tree object (this->invec
+ and this->inveclen) and will be used in the following call to the
+ classify function.
+
+ classify:
+ for every tree type there is a classify function to apply the
+ decision tree to the previously constructed input vector. The
+ size-optimized, encoded output is stored in the tree object
+ (this->outval) and will be used in the following call to the
+ decompose function. Where needed (hitory attribute) the direct tree
+ output is returned by the classify function in a variable.
+
+ decomposeOutClass:
+ for every tree type there is a decompose function to decompose the
+ size-optimized, encoded tree output and map it to the outside the
+ tree usable class value.
+*/
+
+
+/* ************************************************************/
+/* decision tree defines */
+/* ************************************************************/
+
+/* to construct the input vectors several hard-coded values are used
+ to handle attributes that, at the given position, are outside the
+ context. */
+
+/* graph attributes: values to be used if the graph attribute is
+ outside the grapheme string (ie. word) */
+#define PICOKDT_OUTSIDEGRAPH_DEFCH (picoos_uint8)'\x30' /* ascii "0" */
+#define PICOKDT_OUTSIDEGRAPH_DEFSTR (picoos_uint8 *)"\x30" /* ascii "0" */
+#define PICOKDT_OUTSIDEGRAPH_DEFLEN 1
+
+/* graph attributes (special case for g2p): values to be used if the
+ graph attribute is directly outside the grapheme string (ie. at the
+ word boundary word). Use PICOKDT_OUTSIDEGRAPH_DEF* if further
+ outside. */
+#define PICOKDT_OUTSIDEGRAPH_EOW_DEFCH (picoos_uint8)'\x31' /* ascii "1" */
+#define PICOKDT_OUTSIDEGRAPH_EOW_DEFSTR (picoos_uint8 *)"\x31" /* ascii "1" */
+#define PICOKDT_OUTSIDEGRAPH_EOW_DEFLEN 1
+
+/* byte and word type attributes: value to be used if a byte or word
+ attribute is outside the context, e.g. for POS */
+#define PICOKDT_EPSILON 7
+
+/* byte and word type attributes: for attribute with history info a
+ 'zero' value is needed when starting the sequence of predictions.
+ Use the following value to initialize history. Note that the direct
+ tree outputs (not mapped with output map table) of previous
+ predictions need to be used when constructing the input vector for
+ a following prediction. This direct tree output will then be mapped
+ together with the rest of the input vector by the input map
+ table. */
+#define PICOKDT_HISTORY_ZERO 30000
+
+
+/* ************************************************************/
+/* decision tree POS prediction (PosP) functions */
+/* ************************************************************/
+
+/* construct a POS prediction input vector
+ tree input vector: 0-3 prefix UTF8 graphemes
+ 4-9 suffex UTF8 graphemes
+ 10 special grapheme existence flag (TRUE/FALSE)
+ 11 number of graphemes
+ graph: the grapheme string of the word for wich POS will be predicted
+ graphlen: length of graph in number of bytes
+ specgraphflag: existence of a special grapheme boolean
+ returns: TRUE if okay, FALSE otherwise
+ note: use PICOKDT_OUTSIDEGRAPH* for att values outside context
+*/
+picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,
+ const picoos_uint8 *graph,
+ const picoos_uint16 graphlen,
+ const picoos_uint8 specgraphflag);
+
+
+/* classify a previously constructed input vector using tree 'this'
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this);
+
+/* decompose the tree output and return the class in dtres
+ dtres: POS or POSgroup ID classification result
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,
+ picokdt_classify_result_t *dtres);
+
+
+/* ************************************************************/
+/* decision tree POS disambiguation (PosD) functions */
+/* ************************************************************/
+
+/* construct a POS disambiguation input vector (run in left-to-right mode)
+ tree input vector: 0-2 POS or POSgroup for each of the three previous words
+ 3 POSgroup for current word
+ 4-6 POS or POSgroup (can be history) for each of
+ the three following words
+ pre3 - pre1: POSgroup or POS for the previous three words
+ src: POSgroup of current word (if unique POS no posdisa possible)
+ fol1 - fol3: POS or history for the following three words (the more
+ complicated the better... :-( NEEDS TO BE uint16
+ ishist1-ishist3: flag to indicate if fol1-3 are predicted tree
+ output values (history) or the HISTORY_ZERO (TRUE)
+ or an already unambiguous POS (FALSE)
+ returns: TRUE if okay, FALSE otherwise
+ note: use PICOKDT_EPSILON for att values outside context,
+ if POS in fol* unique use this POS instead of real
+ history, use reverse output mapping in these cases
+*/
+picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,
+ const picoos_uint16 * input);
+
+
+/* classify a previously constructed input vector using tree 'this'
+ treeout: direct tree output value
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this,
+ picoos_uint16 *treeout);
+
+/* decompose the tree output and return the class in dtres
+ dtres: POS classification result
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,
+ picokdt_classify_result_t *dtres);
+
+/* convert (unique) POS index into corresponding tree output index */
+picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,
+ const picoos_uint16 inval,
+ picoos_uint16 *outval,
+ picoos_uint16 *outfallbackval);
+
+/* ************************************************************/
+/* decision tree grapheme-to-phoneme (G2P) functions */
+/* ************************************************************/
+
+/* construct a G2P input vector (run in right-to-left mode)
+ tree input vector: 0-8 the 4 previous, current, and 4 following graphemes
+ 9 POS
+ 10-11 vowel count and vowel ID
+ 12 primary stress flag (TRUE/FALSE)
+ 13-15 the three following phones predicted
+ graph: the grapheme string used to determine invec[0:8]
+ graphlen: length of graph in number of bytes
+ count: the grapheme number for which invec will be constructed [0..]
+ pos: the part of speech of the word
+ nrvow number of vowel-like graphemes in graph if vowel,
+ set to 0 otherwise
+ ordvow order of 'count' vowel in graph if vowel,
+ set to 0 otherwise
+ primstressflag: flag indicating if primary stress was already predicted
+ phonech1-3: the three following phon chunks predicted (right-to-left)
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,
+ const picoos_uint8 *graph,
+ const picoos_uint16 graphlen,
+ const picoos_uint8 count,
+ const picoos_uint8 pos,
+ const picoos_uint8 nrvow,
+ const picoos_uint8 ordvow,
+ picoos_uint8 *primstressflag,
+ const picoos_uint16 phonech1,
+ const picoos_uint16 phonech2,
+ const picoos_uint16 phonech3);
+
+/* classify a previously constructed input vector using tree 'this'
+ treeout: direct tree output value
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this,
+ picoos_uint16 *treeout);
+
+/* decompose the tree output and return the class vector in dtvres
+ dtvres: phones vector classification result
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,
+ picokdt_classify_vecresult_t *dtvres);
+
+
+/* ************************************************************/
+/* decision tree phrasing (PHR) functions */
+/* ************************************************************/
+
+/* construct a PHR input vector (run in right-to-left mode)
+ tree input vector: 0-1 POS for each of the two previous words
+ 2 POS for current word
+ 3-4 POS for each of the two following words
+ 5 nr words left
+ 6 nr words right
+ 7 nr syllables right
+ pre2 - pre1: POS for the previous two words
+ src: POS of current word
+ fol1 - fol2: POS for the following two words
+ nrwordspre: number of words left (previous) of current word
+ nrwordsfol: number of words right (following) of current word,
+ incl. current word, up to next BOUND (also
+ considering previously predicted PHR2/3)
+ nrsyllsfol: number of syllables right (following) of current word,
+ incl. syllables of current word, up to next BOUND
+ (also considering previously predicted PHR2/3)
+ returns: TRUE if okay, FALSE otherwise
+ note: use PICOKDT_EPSILON for att values outside context
+*/
+picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,
+ const picoos_uint8 pre2,
+ const picoos_uint8 pre1,
+ const picoos_uint8 src,
+ const picoos_uint8 fol1,
+ const picoos_uint8 fol2,
+ const picoos_uint16 nrwordspre,
+ const picoos_uint16 nrwordsfol,
+ const picoos_uint16 nrsyllsfol);
+
+/* classify a previously constructed input vector using tree 'this'
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this);
+
+/* decompose the tree output and return the class vector in dtres
+ dtres: phrasing classification result
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,
+ picokdt_classify_result_t *dtres);
+
+
+/* ************************************************************/
+/* decision tree accentuation (ACC) functions */
+/* ************************************************************/
+
+/* construct an ACC input vector (run in right-to-left mode)
+ tree input vector: 0-1 POS for each of the two previous words
+ 2 POS for current word
+ 3-4 POS for each of the two following words
+ 5-6 history values (already predicted following)
+ 7 nr words left (previous) to any bound
+ 8 nr syllables left to any bound
+ 9 nr words right (following) to any bound
+ 10 nr syllables right to any bound
+ 11 nr words right to predicted "1" prominence (foot)
+ 12 nr syllables right to predicted "1" prominence (foot)
+ pre2 - pre1: POS for the previous two words
+ src: POS of current word
+ fol1 - fol2: POS for the following two words
+ hist1 - hist2: previously predicted ACC values
+ nrwordspre: number of words left (previous) of current word
+ nrsyllspre: number of syllables left (previous) of current word,
+ incl. initial non-prim stress syllables of current word
+ nrwordsfol: number of words right (following) of current word,
+ incl. current word, up to next BOUND (any strength != 0)
+ nrsyllsfol: number of syllables right (following) of current word,
+ incl. syllables of current word starting with prim. stress
+ syllable
+ footwordsfol: nr of words to the following prominence '1'
+ footsyllspre: nr of syllables to the previous prominence '1'
+ returns: TRUE if okay, FALSE otherwise
+ note: use PICOKDT_EPSILON for att 0-4 values outside context
+*/
+picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this,
+ const picoos_uint8 pre2,
+ const picoos_uint8 pre1,
+ const picoos_uint8 src,
+ const picoos_uint8 fol1,
+ const picoos_uint8 fol2,
+ const picoos_uint16 hist1,
+ const picoos_uint16 hist2,
+ const picoos_uint16 nrwordspre,
+ const picoos_uint16 nrsyllspre,
+ const picoos_uint16 nrwordsfol,
+ const picoos_uint16 nrsyllsfol,
+ const picoos_uint16 footwordsfol,
+ const picoos_uint16 footsyllsfol);
+
+/* classify a previously constructed input vector using tree 'this'
+ treeout: direct tree output value
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this,
+ picoos_uint16 *treeout);
+
+/* decompose the tree output and return the class vector in dtres
+ dtres: phrasing classification result
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,
+ picokdt_classify_result_t *dtres);
+
+
+/* ************************************************************/
+/* decision tree phono-acoustical model (PAM) functions */
+/* ************************************************************/
+
+/* construct a Pam input vector and store the tree-specific encoded
+ input vector in the tree object.
+ vec: tree input vector, 60 single-byte-sized attributes
+ veclen: length of vec in number of bytes
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,
+ const picoos_uint8 *vec,
+ const picoos_uint8 veclen);
+
+/* classify a previously constructed input vector using tree 'this'
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this);
+
+/* decompose the tree output and return the class in dtres
+ dtres: phones vector classification result
+ returns: TRUE if okay, FALSE otherwise
+*/
+picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,
+ picokdt_classify_result_t *dtres);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /*PICOKDT_H_*/