/* * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file picobase.h * * base functionality * * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland * All rights reserved. * * History: * - 2009-04-20 -- initial version * */ #ifndef PICOBASE_H_ #define PICOBASE_H_ #include "picoos.h" #ifdef __cplusplus extern "C" { #endif #if 0 } #endif /* maximum number of bytes of an UTF8 character */ #define PICOBASE_UTF8_MAXLEN 4 typedef picoos_uint8 picobase_utf8char[PICOBASE_UTF8_MAXLEN+1]; /* always zero terminated */ typedef picoos_uint8 picobase_utf8; typedef picoos_uint16 picobase_utf16; typedef picoos_uint32 picobase_utf32; /* ***************************************************************/ /* Unicode UTF8 functions */ /* ***************************************************************/ /** * Determines the number of UTF8 characters contained in * the UTF8 string 'utf8str' of maximum length maxlen (in bytes) * @param utf8str : a string encoded in UTF8 * @param maxlen : max length (in bytes) accessible in utf8str * @return >=0 : length of the UTF8 string in number of UTF8 characters * up to the first '\0' or maxlen * @return <0 : not starting with a valid UTF8 character * @remarks strict implementation, not allowing invalid utf8 */ picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str, const picoos_uint16 maxlen); /** * Determines the number of bytes an UTF8 character used based * on the first byte of the UTF8 character * @param firstchar: the first (and maybe only) byte of an UTF8 character * @return positive value in {1,4} : number of bytes of the UTF8 character * @return 0 :if not a valid UTF8 character start * @remarks strict implementation, not allowing invalid utf8 */ /* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */ #define picobase_det_utf8_length(x) ( ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) ) /** * Converts the content of 'utf8str' to lowercase and stores it on 'lowercase' * on the first byte of the UTF8 character * @param utf8str : utf8 string * @param lowercaseMaxLen : maximal number of bytes available in 'lowercase' * @param lowercase : string converted to lowercase (output) * @param done : flag to report success/failure of the operation (output) * @return TRUE if successful, FALSE otherwise */ picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done); /** * Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase' * @param utf8str : utf8 string * @param uppercase : string converted to uppercase (output) * @param uppercaseMaxLen : maximal number of bytes available in 'uppercase' * @param done : flag to report success/failure of the operation (output) * @return TRUE if successful, FALSE otherwise */ picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done); /** * Gets next UTF8 character 'utf8char' from the UTF8 string * 'utf8s' starting at position 'pos' * @param utf8s : UTF8 string * @param utf8slenmax : max length accessible in utf8s * @param pos : position from where the UTF8 character is checked and copied * (set also as output to the position directly following the UTF8 char) * @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output) * @return TRUE if okay * @return FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len */ picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s, const picoos_uint32 utf8slenmax, picoos_uint32 *pos, picobase_utf8char utf8char); /** * Same as picobase_get_next_utf8char * without copying the char to utf8char */ picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s, const picoos_uint32 utf8slenmax, picoos_uint32 *pos); /** * Gets previous UTF8 character 'utf8char' from the UTF8 string * 'utf8s' starting the backward search at position 'pos-1' * @param utf8s : UTF8 string * @param utf8slenmin : min length accessible in utf8s * @param pos : the search for the prev UTF8 char starts at [pos-1] * (set also as output to the start position of the prev UTF8 character) * @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output) * @return TRUE if okay * @return FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len */ picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s, const picoos_uint32 utf8slenmin, picoos_uint32 *pos, picobase_utf8char utf8char); /** * Same as picobase_get_prev_utf8char * without copying the char to utf8char */ picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s, const picoos_uint32 utf8slenmin, picoos_uint32 *pos); /** * returns TRUE if the input string is UTF8 and uppercase * @param str : UTF8 string * @param strmaxlen : max length for the input string * @return TRUE if string is UTF8 and uppercase * @return FALSE otherwise */ extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen); /** * returns TRUE if the input string is UTF8 and lowercase * @param str : UTF8 string * @param strmaxlen : max length for the input string * @return TRUE if string is UTF8 and lowercase * @return FALSE otherwise */ extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen); #ifdef __cplusplus } #endif #endif /*PICOBASE_H_*/