1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
|
/*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file picotrns.h
*
* fst processing
*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
* All rights reserved.
*
* History:
* - 2009-04-20 -- initial version
*
*/
/** @addtogroup picotrns
*
* Conventions:
*
* - The input to the transducer is a list of pos/sym pairs, where pos are arbitrary position markers
* - All positions are allowed on input (in particular all those coming as an output of a previous transduction)
* - A phone sequence to be transduced has to begin with PICOKNOW_PHON_START_ID and end with PICOKNOW_PHON_TERM_ID
* These special symbols are kept in the transduction output (as first and last symbol)
* - Symbols inserted by the transduction process allways get their position marker pos=PICOTRNS_POS_INSERT
* - The order of positions on output must be the same as that on input, i.e. apart from inserted pairs, the
* output position sequence must be a sub-sequence of the input position sequence.
* - Inserted symbols are allways preceded by a positioned pos/sym pair, e.g.
* if the sequence pos1/sym1, pos2/sym2 should be tranduced to x/sym3, y/sym4, z/sym5, then x must be pos1 or pos2
* and not PICOTRNS_POS_INSERT
*
* For lingware developers: Insertions are always interpreted "to the right"
* - E.g.: The original sequence is phon1 , command , phon2
* - The input to the transducer is then pos1/phon1 , pos2/phon2
* - The output is pos1/phon1' -1/phon_ins pos2/phon2' [assuming -1 is the special insertion pos]
* - Then the new sequence will be recomposed as phon1' , phon_ins , command , phon2' [note position of command!]
* - To overwrite this behaviour, rules must be formulated such that the transduction output is
* pos1/phon1' pos2/phon_ins -1/phon2'
*/
#ifndef PICOTRNS_H_
#define PICOTRNS_H_
#include "picoos.h"
#include "picokfst.h"
#include "picoktab.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif
#define PICOTRNS_MAX_NUM_POSSYM 255
#define PICOTRNS_POS_INSERT (picoos_int16) -1 /* position returned by transducer to mark symbols inserted by the transducer */
#define PICOTRNS_POS_INVALID (picoos_int16) -2 /* value to mark an invalid (e.g. uninitiated) position */
#define PICOTRNS_POS_IGNORE (picoos_int16) -3 /* value to mark a pos/sym pair to be ignored (e.g. start/term symbols only used by the transducer) */
typedef struct picotrns_possym {
picoos_int16 pos;
picoos_int16 sym;
} picotrns_possym_t;
picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane);
#if defined(PICO_DEBUG)
void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym);
void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen);
void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen);
#else
#define PICOTRNS_PRINTSYM(x,y)
#define PICOTRNS_PRINTSYMSEQ(x,y,z)
#define picotrns_printSolution NULL
#endif
typedef struct picotrns_altDesc * picotrns_AltDesc;
picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs);
void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf);
/* type of function for printing transduction solutions;
only for testing purposes in transduction mode where all solutions
are produced */
typedef void picotrns_printSolutionFct(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen);
/** overall transduction; transduces 'inSeq' with 'inSeqLen' elements
to '*outSeqLen' elements in 'outSeq';
*
* @param fst the finite-state transducer used for transduction
* @param firstSolOnly determines whether only the first solution (usually)
or all solutions should be produced (for testing); only the last found
solution is returned in 'outSeq';
* @param printSolution if not NULL, every found solution is displayed using
the given function
* @param inSeq the input sequence
* @param inSeqLen the input sequence length
* @retval outSeq the output sequence
* @retval outSeqLen the output sequence length
* @param maxOutSeqLen must provide the maximum length of 'outSeq'
* @param altDescBuf must provide a working array of length 'maxAltDescLen'
* @param maxAltDescLen should be chosen at least 'maxOutSeqLen' + 1
* @retval nrSteps returns the overall internal number of iterative steps done
* @return status of the transduction: PICO_OK, if transduction successful
@note if 'outSeq' or 'altDesc' are too small to hold a solution,
an error occurs and the input is simply transfered to the output
(up to maximum possible length)
*/
extern pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly,
picotrns_printSolutionFct printSolution,
const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen,
picoos_uint32 *nrSteps);
/* transduce 'inSeq' into 'outSeq' 'inSeq' has to be terminated with the id for symbol '#'. 'outSeq' is terminated in the same way. */
/*
pico_status_t picotrns_transduce_sequence(picokfst_FST fst, const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen);
*/
/* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */
pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen);
/* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way.
* inSeq is assumed to be at most, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM */
pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones,
const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen,
picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen);
/** object : SimpleTransducer
* shortcut : st
*
*/
typedef struct picotrns_simple_transducer * picotrns_SimpleTransducer;
picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm,
picoos_Common common,
picoos_uint16 maxAltDescLen);
pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,
picoos_MemoryManager mm);
pico_status_t picotrns_stInitialize(picotrns_SimpleTransducer transducer);
pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane);
pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst);
pico_status_t picotrns_stGetSymSequence(
picotrns_SimpleTransducer this,
picoos_uint8 * outputSymIds,
picoos_uint32 maxOutputSymIds);
#ifdef __cplusplus
}
#endif
#endif /*PICOTRNS_H_*/
|