diff options
Diffstat (limited to 'lib/picokpdf.c')
-rw-r--r-- | lib/picokpdf.c | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/lib/picokpdf.c b/lib/picokpdf.c new file mode 100644 index 0000000..bbdbec4 --- /dev/null +++ b/lib/picokpdf.c @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file picokpdf.c + * + * knowledge handling for pdf + * + * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland + * All rights reserved. + * + * History: + * - 2009-04-20 -- initial version + * + */ + +#include "picoos.h" +#include "picodbg.h" +#include "picoknow.h" +#include "picokpdf.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + + +/* ************************************************************/ +/* pdf */ +/* ************************************************************/ + +/* + * @addtogroup picokpdf + * + overview: format of knowledge base pdf file + + This is the format for the dur pdf file: + - Numframes: 1 uint16 + - Vecsize: 1 uint8 + - sampperframe: 1 uint8 + - Phonquantlen: 1 uint8 + - Phonquant: Phonquantlen uint8 + - Statequantlen: 1 uint8 + - Statequantlen: Statequantlen uint8 + - And then numframes x vecsize uint8 + + This is the format for mul (mgc and lfz) pdf files: + - numframes: 1 uint16 + - vecsize: 1 uint8 + - numstates: 1 uint8 + - numframesperstate: numstates uint16 + - ceporder: 1 uint8 + - numvuv 1 uint8 + - numdeltas: 1 uint8 + - scmeanpow: 1 uint8 + - maxbigpow: 1 uint8 + - scmeanpowum KPDF_NUMSTREAMS * ceporder uint8 + - scivarpow KPDF_NUMSTREAMS * ceporder uint8 + + And then numframes x vecsize uint8 + +*/ + + +/* ************************************************************/ +/* pdf data defines */ +/* may not be changed with current implementation */ +/* ************************************************************/ + + +#define KPDF_NUMSTREAMS 3 /* coeff, delta, deltadelta */ + + +/* ************************************************************/ +/* pdf loading */ +/* ************************************************************/ + +static pico_status_t kpdfDURInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + picokpdf_pdfdur_t *pdfdur; + picoos_uint16 pos; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + pdfdur = (picokpdf_pdfdur_t *)this->subObj; + + pos = 0; + + pdfdur->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 | + this->base[pos]; + pos += 2; + pdfdur->vecsize = this->base[pos++]; + pdfdur->sampperframe = this->base[pos++]; + pdfdur->phonquantlen = this->base[pos++]; + pdfdur->phonquant = &(this->base[pos]); + pos += pdfdur->phonquantlen; + pdfdur->statequantlen = this->base[pos++]; + pdfdur->statequant = &(this->base[pos]); + pos += pdfdur->statequantlen; + pdfdur->content = &(this->base[pos]); + PICODBG_DEBUG(("numframes %d, vecsize %d, phonquantlen %d, " + "statequantlen %d", pdfdur->numframes, pdfdur->vecsize, + pdfdur->phonquantlen, pdfdur->statequantlen)); + if ((picoos_uint32)(pos + (pdfdur->numframes * pdfdur->vecsize)) != this->size) { + PICODBG_DEBUG(("header-spec size %d, kb-size %d", + pos + (pdfdur->numframes * pdfdur->vecsize), + this->size)); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + PICODBG_DEBUG(("dur pdf initialized")); + return PICO_OK; +} + +static picoos_uint8 convScaleFactorToBig(picoos_uint8 pow, picoos_uint8 bigpow) +{ + if (pow > 0x0F) { + pow = bigpow + (0xFF - pow + 1); /* take 2's complement of negative pow */ + } else if (bigpow >= pow) { + pow = bigpow - pow; + } else { + /* error: bigpow is smaller than input pow */ + return 0; + } + return pow; +} + +static pico_status_t kpdfMULInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + picokpdf_pdfmul_t *pdfmul; + picoos_uint16 pos; + picoos_uint8 scmeanpow, maxbigpow, nummean; + picoos_uint8 i; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + pdfmul = (picokpdf_pdfmul_t *)this->subObj; + + pos = 0; + + pdfmul->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 | + this->base[pos]; + pos += 2; + pdfmul->vecsize = this->base[pos++]; + pdfmul->numstates = this->base[pos++]; + { + pdfmul->stateoffset[0] = (picoos_uint16) 0; + for (i=1; i<pdfmul->numstates; i++) { + pdfmul->stateoffset[i] = pdfmul->stateoffset[i-1] + (this->base[pos] | ((picoos_uint16) this->base[pos+1] << 8)); + pos += 2; + } + pos += 2; /* we don't need the last number if we only need the offset (i.e. how to get to the vector start) */ + } + + pdfmul->ceporder = this->base[pos++]; + pdfmul->numvuv = this->base[pos++]; + pdfmul->numdeltas = this->base[pos++]; + scmeanpow = this->base[pos++]; + maxbigpow = this->base[pos++]; + if (maxbigpow < PICOKPDF_BIG_POW) { + PICODBG_ERROR(("bigpow %i is larger than maxbigpow %i defined in pdf lingware", PICOKPDF_BIG_POW, maxbigpow)); + return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL); + } + pdfmul->bigpow = PICOKPDF_BIG_POW; /* what we have to use is the smaller number! */ + + pdfmul->amplif = this->base[pos++]; + + /* bigpow corrected by scmeanpow, multiply means by 2^meanpow to obtain fixed point representation */ + pdfmul->meanpow = convScaleFactorToBig(scmeanpow, pdfmul->bigpow); + if (0 == pdfmul->meanpow) { + PICODBG_ERROR(("error in convScaleFactorToBig")); + return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL); + } + nummean = 3*pdfmul->ceporder; + + pdfmul->meanpowUm = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8)); + pdfmul->ivarpow = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8)); + if ((NULL == pdfmul->meanpowUm) || (NULL == pdfmul->ivarpow)) { + picoos_deallocate(common->mm,(void *) &(pdfmul->meanpowUm)); + picoos_deallocate(common->mm,(void *) &(pdfmul->ivarpow)); + return picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM,NULL,NULL); + } + + /* read meanpowUm and convert on the fly */ + /* meaning of meanpowUm becomes: multiply means from pdf stream by 2^meanpowUm + * to achieve fixed point scaling by big + */ + for (i=0; i<nummean; i++) { + pdfmul->meanpowUm[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow); + } + + /*read ivarpow and convert on the fly */ + for (i=0; i<nummean; i++) { + pdfmul->ivarpow[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow); + } + + /* check numdeltas */ + if ((pdfmul->numdeltas == 0xFF) && (pdfmul->vecsize != (pdfmul->numvuv + pdfmul->ceporder * 3 * (2+1)))) { + PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas")); + return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL); + } + +/* vecsize: 1 uint8 for vuv + + ceporder short for static means + + numdeltas uint8 and short for sparse delta means + + ceporder*3 uint8 for static and delta inverse variances +*/ + if ((pdfmul->numdeltas != 0xFF) && (pdfmul->vecsize != pdfmul->numvuv+pdfmul->ceporder*2+pdfmul->numdeltas*3+pdfmul->ceporder*3)) { + PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas\n" + "vecsize = %i while numvuv+ceporder*2 + numdeltas*3 + ceporder*3 = %i", + pdfmul->vecsize, pdfmul->numvuv + pdfmul->ceporder*2 + pdfmul->numdeltas * 3 + pdfmul->ceporder * 3)); + return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL); + } + pdfmul->content = &(this->base[pos]); + PICODBG_DEBUG(("numframes %d, vecsize %d, numstates %d, ceporder %d, " + "numvuv %d, numdeltas %d, meanpow %d, bigpow %d", + pdfmul->numframes, pdfmul->vecsize, pdfmul->numstates, + pdfmul->ceporder, pdfmul->numvuv, pdfmul->numdeltas, + pdfmul->meanpow, pdfmul->bigpow)); + if ((picoos_uint32)(pos + (pdfmul->numframes * pdfmul->vecsize)) != this->size) { + PICODBG_DEBUG(("header-spec size %d, kb-size %d", + pos + (pdfmul->numframes * pdfmul->vecsize), + this->size)); + return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, + NULL, NULL); + } + PICODBG_DEBUG(("mul pdf initialized")); + return PICO_OK; +} + +static pico_status_t kpdfPHSInitialize(register picoknow_KnowledgeBase this, + picoos_Common common) { + picokpdf_pdfphs_t *pdfphs; + picoos_uint16 pos; + + if (NULL == this || NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + pdfphs = (picokpdf_pdfphs_t *)this->subObj; + + pos = 0; + + pdfphs->numvectors = ((picoos_uint16)(this->base[pos+1])) << 8 | + this->base[pos]; + pos += 2; + pdfphs->indexBase = &(this->base[pos]); + pdfphs->contentBase = pdfphs->indexBase + pdfphs->numvectors * sizeof(picoos_uint32); + PICODBG_DEBUG(("phs pdf initialized")); + return PICO_OK; +} + + + +static pico_status_t kpdfMULSubObjDeallocate(register picoknow_KnowledgeBase this, + picoos_MemoryManager mm) { + + + picokpdf_pdfmul_t *pdfmul; + + if ((NULL != this) && (NULL != this->subObj)) { + pdfmul = (picokpdf_pdfmul_t *)this->subObj; + picoos_deallocate(mm,(void *) &(pdfmul->meanpowUm)); + picoos_deallocate(mm,(void *) &(pdfmul->ivarpow)); + picoos_deallocate(mm, (void *) &(this->subObj)); + } + return PICO_OK; +} + +static pico_status_t kpdfDURSubObjDeallocate(register picoknow_KnowledgeBase this, + picoos_MemoryManager mm) { + if (NULL != this) { + picoos_deallocate(mm, (void *) &this->subObj); + } + return PICO_OK; +} + +static pico_status_t kpdfPHSSubObjDeallocate(register picoknow_KnowledgeBase this, + picoos_MemoryManager mm) { + if (NULL != this) { + picoos_deallocate(mm, (void *) &this->subObj); + } + return PICO_OK; +} + +/* we don't offer a specialized constructor for a *KnowledgeBase but + * instead a "specializer" of an allready existing generic + * picoknow_KnowledgeBase */ + +pico_status_t picokpdf_specializePdfKnowledgeBase(picoknow_KnowledgeBase this, + picoos_Common common, + const picokpdf_kpdftype_t kpdftype) { + pico_status_t status; + + if (NULL == this) { + return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, + NULL, NULL); + } + switch (kpdftype) { + case PICOKPDF_KPDFTYPE_DUR: + this->subDeallocate = kpdfDURSubObjDeallocate; + this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfdur_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kpdfDURInitialize(this, common); + break; + case PICOKPDF_KPDFTYPE_MUL: + this->subDeallocate = kpdfMULSubObjDeallocate; + this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfmul_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kpdfMULInitialize(this, common); + break; + case PICOKPDF_KPDFTYPE_PHS: + this->subDeallocate = kpdfPHSSubObjDeallocate; + this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfphs_t)); + if (NULL == this->subObj) { + return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, + NULL, NULL); + } + status = kpdfPHSInitialize(this, common); + break; + + default: + return picoos_emRaiseException(common->em, PICO_ERR_OTHER, + NULL, NULL); + } + + if (status != PICO_OK) { + picoos_deallocate(common->mm, (void *) &this->subObj); + return picoos_emRaiseException(common->em, status, NULL, NULL); + } + return PICO_OK; +} + + +/* ************************************************************/ +/* pdf getPdf* */ +/* ************************************************************/ + +picokpdf_PdfDUR picokpdf_getPdfDUR(picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokpdf_PdfDUR) this->subObj)); +} + +picokpdf_PdfMUL picokpdf_getPdfMUL(picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokpdf_PdfMUL) this->subObj)); +} + +picokpdf_PdfPHS picokpdf_getPdfPHS(picoknow_KnowledgeBase this) { + return ((NULL == this) ? NULL : ((picokpdf_PdfPHS) this->subObj)); +} + + +#ifdef __cplusplus +} +#endif + + +/* end */ |