summaryrefslogtreecommitdiffstats
path: root/lib/picokfst.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/picokfst.c')
-rw-r--r--lib/picokfst.c438
1 files changed, 438 insertions, 0 deletions
diff --git a/lib/picokfst.c b/lib/picokfst.c
new file mode 100644
index 0000000..560709c
--- /dev/null
+++ b/lib/picokfst.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file picokfst.c
+ *
+ * FST knowledge loading and access
+ *
+ * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
+ * All rights reserved.
+ *
+ * History:
+ * - 2009-04-20 -- initial version
+ *
+ */
+#include "picoos.h"
+#include "picodbg.h"
+#include "picoknow.h"
+#include "picokfst.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+}
+#endif
+
+
+#define FileHdrSize 4 /* size of FST file header */
+
+
+
+/* ************************************************************/
+/* function to create specialized kb, */
+/* to be used by picorsrc only */
+/* ************************************************************/
+
+/** object : FSTKnowledgeBase
+ * shortcut : kfst
+ * derived from : picoknow_KnowledgeBase
+ */
+
+typedef struct kfst_subobj * kfst_SubObj;
+
+typedef struct kfst_subobj{
+ picoos_uint8 * fstStream; /* the byte stream base address */
+ picoos_int32 hdrLen; /* length of file header */
+ picoos_int32 transductionMode; /* transduction mode to be used for FST */
+ picoos_int32 nrClasses; /* nr of pair/transition classes in FST; class is in [1..nrClasses] */
+ picoos_int32 nrStates; /* nr of states in FST; state is in [1..nrState] */
+ picoos_int32 termClass; /* pair class of terminator symbol pair; probably obsolete */
+ picoos_int32 alphaHashTabSize; /* size of pair alphabet hash table */
+ picoos_int32 alphaHashTabPos; /* absolute address of the start of the pair alphabet */
+ picoos_int32 transTabEntrySize; /* size in bytes of each transition table entry */
+ picoos_int32 transTabPos; /* absolute address of the start of the transition table */
+ picoos_int32 inEpsStateTabPos; /* absolute address of the start of the input epsilon transition table */
+ picoos_int32 accStateTabPos; /* absolute address of the table of accepting states */
+} kfst_subobj_t;
+
+
+
+/* ************************************************************/
+/* primitives for reading from byte stream */
+/* ************************************************************/
+
+/* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into unsigned number 'num'.
+ '*pos' is modified to the position right after the number */
+static void FixedBytesToUnsignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_uint32 * num)
+{
+ picoos_int32 i;
+
+ (*num) = 0;
+ for (i = 0; i < nrBytes; i++) {
+ (*num) = ((*num) << 8) + (picoos_uint32)stream[*pos];
+ (*pos)++;
+ }
+}
+
+
+/* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into signed number 'num'.
+ '*pos' is modified to the position right after the number */
+static void FixedBytesToSignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_int32 * num)
+{
+ picoos_int32 i;
+ picoos_uint32 val;
+
+ val = 0;
+ for (i = 0; i < nrBytes; i++) {
+ val = (val << 8) + (picoos_uint32)stream[*pos];
+ (*pos)++;
+ }
+ if (val % 2 == 1) {
+ /* negative number */
+ (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
+ } else {
+ /* positive number */
+ (*num) = val / 2;
+ }
+}
+
+
+/* Converts varying-sized sequence of bytes starting at position '*pos' in byte stream 'stream'
+ into (signed) number 'num'. '*pos' is modified to the position right after the number. */
+static void BytesToNum (picoos_uint8 * stream, picoos_uint32 * pos, picoos_int32 * num)
+{
+ picoos_uint32 val;
+ picoos_uint32 b;
+
+ val = 0;
+ b = (picoos_uint32)stream[*pos];
+ (*pos)++;
+ while (b < 128) {
+ val = (val << 7) + b;
+ b = (picoos_uint32)stream[*pos];
+ (*pos)++;
+ }
+ val = (val << 7) + (b - 128);
+ if (val % 2 == 1) {
+ /* negative number */
+ (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
+ } else {
+ /* positive number */
+ (*num) = val / 2;
+ }
+}
+
+
+/* ************************************************************/
+/* setting up FST from byte stream */
+/* ************************************************************/
+
+static pico_status_t kfstInitialize(register picoknow_KnowledgeBase this,
+ picoos_Common common)
+{
+ picoos_uint32 curpos;
+ picoos_int32 offs;
+ kfst_subobj_t * kfst;
+
+ PICODBG_DEBUG(("kfstInitialize -- start\n"));
+
+ if (NULL == this || NULL == this->subObj) {
+ return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL,
+ NULL);
+ }
+ kfst = (kfst_subobj_t *) this->subObj;
+
+ /* +CT+ */
+ kfst->fstStream = this->base;
+ PICODBG_TRACE(("base: %d\n",this->base));
+ kfst->hdrLen = FileHdrSize;
+ curpos = kfst->hdrLen;
+ BytesToNum(kfst->fstStream,& curpos,& kfst->transductionMode);
+ BytesToNum(kfst->fstStream,& curpos,& kfst->nrClasses);
+ BytesToNum(kfst->fstStream,& curpos,& kfst->nrStates);
+ BytesToNum(kfst->fstStream,& curpos,& kfst->termClass);
+ BytesToNum(kfst->fstStream,& curpos,& kfst->alphaHashTabSize);
+ BytesToNum(kfst->fstStream,& curpos,& offs);
+ kfst->alphaHashTabPos = kfst->hdrLen + offs;
+ BytesToNum(kfst->fstStream,& curpos,& kfst->transTabEntrySize);
+ BytesToNum(kfst->fstStream,& curpos,& offs);
+ kfst->transTabPos = kfst->hdrLen + offs;
+ BytesToNum(kfst->fstStream,& curpos,& offs);
+ kfst->inEpsStateTabPos = kfst->hdrLen + offs;
+ BytesToNum(kfst->fstStream,& curpos,& offs);
+ kfst->accStateTabPos = kfst->hdrLen + offs;
+ /* -CT- */
+
+ return PICO_OK;
+}
+
+
+static pico_status_t kfstSubObjDeallocate(register picoknow_KnowledgeBase this,
+ picoos_MemoryManager mm)
+{
+ if (NULL != this) {
+ picoos_deallocate(mm, (void *) &this->subObj);
+ }
+ return PICO_OK;
+}
+
+
+/* calculates a small number of data (e.g. addresses) from kb for fast access.
+ * This data is encapsulated in a picokfst_FST that can later be retrieved
+ * with picokfst_getFST. */
+pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
+ picoos_Common common)
+{
+ pico_status_t status;
+
+ if (NULL == this) {
+ return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL);
+ }
+ if (0 < this->size) {
+ /* not a dummy kb */
+ this->subDeallocate = kfstSubObjDeallocate;
+
+ this->subObj = picoos_allocate(common->mm, sizeof(kfst_subobj_t));
+
+ if (NULL == this->subObj) {
+ return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
+ }
+ status = kfstInitialize(this, common);
+ if (PICO_OK != status) {
+ picoos_deallocate(common->mm,(void **)&this->subObj);
+ }
+ }
+ return PICO_OK;
+}
+
+
+/* ************************************************************/
+/* FST type and getFST function */
+/* ************************************************************/
+
+
+
+/* return kb FST for usage in PU */
+picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this)
+{
+ if (NULL == this) {
+ return NULL;
+ } else {
+ return (picokfst_FST) this->subObj;
+ }
+}
+
+
+
+/* ************************************************************/
+/* FST access methods */
+/* ************************************************************/
+
+
+/* see description in header file */
+extern picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this)
+{
+ kfst_SubObj fst = (kfst_SubObj) this;
+ if (fst != NULL) {
+ return fst->transductionMode;
+ } else {
+ return 0;
+ }
+}
+
+
+/* see description in header file */
+extern void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses)
+{
+ kfst_SubObj fst = (kfst_SubObj) this;
+ if (fst != NULL) {
+ *nrStates = fst->nrStates;
+ *nrClasses = fst->nrClasses;
+ } else {
+ *nrStates = 0;
+ *nrClasses = 0;
+ }
+}
+
+/* see description in header file */
+extern void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
+ picoos_bool * inSymFound, picoos_int32 * searchState)
+{
+ picoos_uint32 pos;
+ picoos_int32 offs;
+ picoos_int32 h;
+ picoos_int32 inSymCellPos;
+ picoos_int32 inSymX;
+ picoos_int32 nextSameHashInSymOffs;
+
+ kfst_SubObj fst = (kfst_SubObj) this;
+ (*searchState) = -1;
+ (*inSymFound) = 0;
+ h = inSym % fst->alphaHashTabSize;
+ pos = fst->alphaHashTabPos + (h * 4);
+ FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
+ if (offs > 0) {
+ inSymCellPos = fst->alphaHashTabPos + offs;
+ pos = inSymCellPos;
+ BytesToNum(fst->fstStream,& pos,& inSymX);
+ BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
+ while ((inSymX != inSym) && (nextSameHashInSymOffs > 0)) {
+ inSymCellPos = inSymCellPos + nextSameHashInSymOffs;
+ pos = inSymCellPos;
+ BytesToNum(fst->fstStream,& pos,& inSymX);
+ BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
+ }
+ if (inSymX == inSym) {
+ /* input symbol found; state is set to position after symbol cell */
+ (*searchState) = pos;
+ (*inSymFound) = 1;
+ }
+ }
+}
+
+
+/* see description in header file */
+extern void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
+ picoos_bool * pairFound,
+ picokfst_symid_t * outSym, picokfst_class_t * pairClass)
+{
+ picoos_uint32 pos;
+ picoos_int32 val;
+
+ kfst_SubObj fst = (kfst_SubObj) this;
+ if ((*searchState) < 0) {
+ (*pairFound) = 0;
+ (*outSym) = PICOKFST_SYMID_ILLEG;
+ (*pairClass) = -1;
+ } else {
+ pos = (*searchState);
+ BytesToNum(fst->fstStream,& pos,& val);
+ *outSym = (picokfst_symid_t)val;
+ if ((*outSym) != PICOKFST_SYMID_ILLEG) {
+ BytesToNum(fst->fstStream,& pos,& val);
+ *pairClass = (picokfst_class_t)val;
+ (*pairFound) = 1;
+ (*searchState) = pos;
+ } else {
+ (*pairFound) = 0;
+ (*outSym) = PICOKFST_SYMID_ILLEG;
+ (*pairClass) = -1;
+ (*searchState) = -1;
+ }
+ }
+}
+
+
+
+/* see description in header file */
+extern void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
+ picokfst_state_t * endState)
+{
+
+ picoos_uint32 pos;
+ picoos_int32 index;
+ picoos_uint32 endStateX;
+
+ kfst_SubObj fst = (kfst_SubObj) this;
+ if ((startState < 1) || (startState > fst->nrStates) || (transClass < 1) || (transClass > fst->nrClasses)) {
+ (*endState) = 0;
+ } else {
+ index = (startState - 1) * fst->nrClasses + transClass - 1;
+ pos = fst->transTabPos + (index * fst->transTabEntrySize);
+ FixedBytesToUnsignedNum(fst->fstStream,fst->transTabEntrySize,& pos,& endStateX);
+ (*endState) = endStateX;
+ }
+}
+
+
+/* see description in header file */
+extern void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
+ picoos_bool * inEpsTransFound, picoos_int32 * searchState)
+{
+
+ picoos_int32 offs;
+ picoos_uint32 pos;
+
+ kfst_SubObj fst = (kfst_SubObj) this;
+ (*searchState) = -1;
+ (*inEpsTransFound) = 0;
+ if ((startState > 0) && (startState <= fst->nrStates)) {
+ pos = fst->inEpsStateTabPos + (startState - 1) * 4;
+ FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
+ if (offs > 0) {
+ (*searchState) = fst->inEpsStateTabPos + offs;
+ (*inEpsTransFound) = 1;
+ }
+ }
+}
+
+
+
+/* see description in header file */
+extern void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
+ picoos_bool * inEpsTransFound,
+ picokfst_symid_t * outSym, picokfst_state_t * endState)
+{
+ picoos_uint32 pos;
+ picoos_int32 val;
+
+ kfst_SubObj fst = (kfst_SubObj) this;
+ if ((*searchState) < 0) {
+ (*inEpsTransFound) = 0;
+ (*outSym) = PICOKFST_SYMID_ILLEG;
+ (*endState) = 0;
+ } else {
+ pos = (*searchState);
+ BytesToNum(fst->fstStream,& pos,& val);
+ *outSym = (picokfst_symid_t)val;
+ if ((*outSym) != PICOKFST_SYMID_ILLEG) {
+ BytesToNum(fst->fstStream,& pos,& val);
+ *endState = (picokfst_state_t)val;
+ (*inEpsTransFound) = 1;
+ (*searchState) = pos;
+ } else {
+ (*inEpsTransFound) = 0;
+ (*outSym) = PICOKFST_SYMID_ILLEG;
+ (*endState) = 0;
+ (*searchState) = -1;
+ }
+ }
+}
+
+
+/* see description in header file */
+extern picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state)
+{
+
+ picoos_uint32 pos;
+ picoos_uint32 val;
+
+ kfst_SubObj fst = (kfst_SubObj) this;
+ if ((state > 0) && (state <= fst->nrStates)) {
+ pos = fst->accStateTabPos + (state - 1);
+ FixedBytesToUnsignedNum(fst->fstStream,1,& pos,& val);
+ return (val == 1);
+ } else {
+ return 0;
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* End picofst.c */