1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file picoklex.h
*
* knowledge base: lexicon
*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
* All rights reserved.
*
* History:
* - 2009-04-20 -- initial version
*
*/
#ifndef PICOKLEX_H_
#define PICOKLEX_H_
#include "picoos.h"
#include "picoknow.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif
/* ************************************************************/
/* function to create specialized kb, */
/* to be used by picorsrc only */
/* ************************************************************/
pico_status_t picoklex_specializeLexKnowledgeBase(picoknow_KnowledgeBase this,
picoos_Common common);
/* ************************************************************/
/* lexicon type and getLex function */
/* ************************************************************/
/* lexicon type */
typedef struct picoklex_lex * picoklex_Lex;
/* return kb lex for usage in PU */
picoklex_Lex picoklex_getLex(picoknow_KnowledgeBase this);
/* ************************************************************/
/* lexicon lookup result type */
/* ************************************************************/
/* max nr of results */
#define PICOKLEX_MAX_NRRES 4
/* nr of bytes used for pos and index, needs to fit in uint32, ie. max 4 */
#define PICOKLEX_POSIND_SIZE 4
/* nr of bytes used for index, needs to fit in uint32, ie. max 4 */
#define PICOKLEX_IND_SIZE 3
/* max len (in bytes) of ind, (PICOKLEX_MAX_NRRES * PICOKLEX_POSIND_SIZE) */
#define PICOKLEX_POSIND_MAXLEN 16
/* the lexicon lookup result(s) are stored in field posind, which
contains a sequence of
POS1-byte, IND1-bytes, POS2-byte, IND2-bytes, etc.
the IND-bytes are the byte position(s) in the lexblocks part of the
lexicon byte stream, starting at picoklex_lex_t.lexblocks.
for lexentries without phones only the POS (there can be only one)
is stored in posind, nrres equals one, and phonfound is FALSE.
*/
typedef struct {
picoos_uint8 nrres; /* number of results, 0 of no entry found */
picoos_uint8 posindlen; /* number of posind bytes */
picoos_uint8 phonfound; /* phones found flag, TRUE if found */
picoos_uint8 posind[PICOKLEX_POSIND_MAXLEN]; /* sequence of multi-ind,
one per result */
} picoklex_lexl_result_t;
/* ************************************************************/
/* lexicon lookup functions */
/* ************************************************************/
/** lookup lex by graph; result(s) are in lexres, ie. the phones are
not returned directly (because they are used later and space can be
saved using indices first), lexres contains an index (or several)
to the entry for later fast lookup once the phones are needed.
PICOKLEX_IND_SIZE bytes are used for the index, these ind bytes are
saved in the WORDINDEX items. If at least one entry is found TRUE
is returned, FALSE otherwise */
picoos_uint8 picoklex_lexLookup(const picoklex_Lex this,
const picoos_uint8 *graph,
const picoos_uint16 graphlen,
picoklex_lexl_result_t *lexres);
/** lookup lex entry by index ind; ind is a sequence of bytes with
length indlen (must be equal PICOKLEX_IND_SIZE) that is the content
of a WORDINDEX item. Returns TRUE if okay, FALSE otherwise */
picoos_uint8 picoklex_lexIndLookup(const picoklex_Lex this,
const picoos_uint8 *ind,
const picoos_uint8 indlen,
picoos_uint8 *pos,
picoos_uint8 **phon,
picoos_uint8 *phonlen);
#ifdef __cplusplus
}
#endif
#endif /*PICOKLEX_H_*/
|