diff options
Diffstat (limited to 'lib/picoklex.h')
-rw-r--r-- | lib/picoklex.h | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/lib/picoklex.h b/lib/picoklex.h new file mode 100644 index 0000000..e186393 --- /dev/null +++ b/lib/picoklex.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file picoklex.h + * + * knowledge base: lexicon + * + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * All rights reserved. + * + * History: + * - 2009-04-20 -- initial version + * + */ + +#ifndef PICOKLEX_H_ +#define PICOKLEX_H_ + +#include "picoos.h" +#include "picoknow.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + + +/* ************************************************************/ +/* function to create specialized kb, */ +/* to be used by picorsrc only */ +/* ************************************************************/ + +pico_status_t picoklex_specializeLexKnowledgeBase(picoknow_KnowledgeBase this, + picoos_Common common); + + +/* ************************************************************/ +/* lexicon type and getLex function */ +/* ************************************************************/ + +/* lexicon type */ +typedef struct picoklex_lex * picoklex_Lex; + +/* return kb lex for usage in PU */ +picoklex_Lex picoklex_getLex(picoknow_KnowledgeBase this); + + +/* ************************************************************/ +/* lexicon lookup result type */ +/* ************************************************************/ + +/* max nr of results */ +#define PICOKLEX_MAX_NRRES 4 + +/* nr of bytes used for pos and index, needs to fit in uint32, ie. max 4 */ +#define PICOKLEX_POSIND_SIZE 4 +/* nr of bytes used for index, needs to fit in uint32, ie. max 4 */ +#define PICOKLEX_IND_SIZE 3 +/* max len (in bytes) of ind, (PICOKLEX_MAX_NRRES * PICOKLEX_POSIND_SIZE) */ +#define PICOKLEX_POSIND_MAXLEN 16 + + +/* the lexicon lookup result(s) are stored in field posind, which + contains a sequence of + POS1-byte, IND1-bytes, POS2-byte, IND2-bytes, etc. + + the IND-bytes are the byte position(s) in the lexblocks part of the + lexicon byte stream, starting at picoklex_lex_t.lexblocks. + + for lexentries without phones only the POS (there can be only one) + is stored in posind, nrres equals one, and phonfound is FALSE. +*/ + +typedef struct { + picoos_uint8 nrres; /* number of results, 0 of no entry found */ + picoos_uint8 posindlen; /* number of posind bytes */ + picoos_uint8 phonfound; /* phones found flag, TRUE if found */ + picoos_uint8 posind[PICOKLEX_POSIND_MAXLEN]; /* sequence of multi-ind, + one per result */ +} picoklex_lexl_result_t; + + +/* ************************************************************/ +/* lexicon lookup functions */ +/* ************************************************************/ + +/** lookup lex by graph; result(s) are in lexres, ie. the phones are + not returned directly (because they are used later and space can be + saved using indices first), lexres contains an index (or several) + to the entry for later fast lookup once the phones are needed. + PICOKLEX_IND_SIZE bytes are used for the index, these ind bytes are + saved in the WORDINDEX items. If at least one entry is found TRUE + is returned, FALSE otherwise */ +picoos_uint8 picoklex_lexLookup(const picoklex_Lex this, + const picoos_uint8 *graph, + const picoos_uint16 graphlen, + picoklex_lexl_result_t *lexres); + +/** lookup lex entry by index ind; ind is a sequence of bytes with + length indlen (must be equal PICOKLEX_IND_SIZE) that is the content + of a WORDINDEX item. Returns TRUE if okay, FALSE otherwise */ +picoos_uint8 picoklex_lexIndLookup(const picoklex_Lex this, + const picoos_uint8 *ind, + const picoos_uint8 indlen, + picoos_uint8 *pos, + picoos_uint8 **phon, + picoos_uint8 *phonlen); + +#ifdef __cplusplus +} +#endif + + +#endif /*PICOKLEX_H_*/ |