diff options
Diffstat (limited to 'stack/xml/xml_parse.c')
-rw-r--r-- | stack/xml/xml_parse.c | 1502 |
1 files changed, 1502 insertions, 0 deletions
diff --git a/stack/xml/xml_parse.c b/stack/xml/xml_parse.c new file mode 100644 index 0000000..3f9e2e1 --- /dev/null +++ b/stack/xml/xml_parse.c @@ -0,0 +1,1502 @@ +/***************************************************************************** +** +** Name: xml_parse.c +** +** File: XML Parser +** +** Copyright (c) 2000-2011, Broadcom Corp., All Rights Reserved. +** Broadcom Bluetooth Core. Proprietary and confidential. +** +*****************************************************************************/ +#include "bt_target.h" +#include "xml_pars_api.h" +#include "data_types.h" +#include "bt_types.h" +/* The XML Parser is dependent on the Object Store. At present +** the object store resides in GOEP and hence the parser is +** dependent on GOEP. The parser only uses the Object Store +** in GOEP, so if the Object Store is separated from GOEP in the +** future, the parser will not be dependent on GOEP. +*/ + +#include <stdlib.h> +#include <string.h> + +#ifndef BIP_TRACE_XML +#define BIP_TRACE_XML FALSE +#endif + +#if (defined(BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) +#define XML_TRACE_DEBUG0(m) {BT_TRACE_0(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m);} +#define XML_TRACE_DEBUG1(m,p1) {BT_TRACE_1(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1);} +#define XML_TRACE_DEBUG2(m,p1,p2) {BT_TRACE_2(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2);} +#define XML_TRACE_DEBUG3(m,p1,p2,p3) {BT_TRACE_3(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3);} +#define XML_TRACE_DEBUG4(m,p1,p2,p3,p4) {BT_TRACE_4(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4);} +#define XML_TRACE_DEBUG5(m,p1,p2,p3,p4,p5) {BT_TRACE_5(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4,p5);} +#define XML_TRACE_DEBUG6(m,p1,p2,p3,p4,p5,p6) {BT_TRACE_6(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4,p5,p6);} +#else +#define XML_TRACE_DEBUG0(m) +#define XML_TRACE_DEBUG1(m,p1) +#define XML_TRACE_DEBUG2(m,p1,p2) +#define XML_TRACE_DEBUG3(m,p1,p2,p3) +#define XML_TRACE_DEBUG4(m,p1,p2,p3,p4) +#define XML_TRACE_DEBUG5(m,p1,p2,p3,p4,p5) +#define XML_TRACE_DEBUG6(m,p1,p2,p3,p4,p5,p6) +#endif + +/***************************************************************************** +** Constants +*****************************************************************************/ + +#define XML_ST '<' +#define XML_GT '>' +#define XML_QM '?' +#define XML_EX '!' +#define XML_EM '/' /* End Mark */ +#define XML_CO ':' +#define XML_EQ '=' +#define XML_SQ '\'' +#define XML_DQ '"' +#define XML_AM '&' +#define XML_SC ';' +#define XML_PD '#' +#define XML_HX 'x' +#define XML_HY '-' +#define XML_LB '[' + +#define XML_LT_STR "lt" +#define XML_GT_STR "gt" +#define XML_AMP_STR "amp" +#define XML_APOS_STR "apos" +#define XML_QUOT_STR "quot" + +#define XML_QTAG_END_STR "?>" +#define XML_COMM_STR "--" +#define XML_COMM_END_STR "-->" +#define XML_CDS_STR "[CDATA[" +#define XML_CDS_END_STR "]]>" +#define XML_DOCT_STR "<'\"" + +static const UINT8 xml_name_srch[] = ":=/> \t\n\r"; + + +/***************************************************************************** +** Type Definitions +*****************************************************************************/ + +enum +{ + XML_PASS_WS, + XML_SKIP_WS, + XML_NORM_WS +}; +typedef UINT16 tXML_WS_OP; + + + +/***************************************************************************** +** Globals +** +** The global below is used as the buffer set (tXML_BFR_SET) in a local +** variable (of type tXML_MUL_STATE) in XML_Parse. The buffer set memory, is +** separated from the rest of tXML_MUL_STATE to make it easy to change the +** allocation of its memory if found necessary. See xml_alloc_bfr_set. +*****************************************************************************/ + +/***************************************************************************** +** Macro Functions +*****************************************************************************/ + +#define XML_EOS(p_st) ((p_st)->curr_res <= 0) /* End Of Store */ +/* white space: " ", \t, \r, \n */ +#define XML_IS_WS(c) (((c) == 0x20) || ((c) == 0x9) || \ + ((c) == 0xD) || ((c) == 0xA) || \ + ((c) == 0x00) ) + + +/***************************************************************************** +** Function Prototypes +*****************************************************************************/ + +static BOOLEAN xml_get_next(tXML_MUL_STATE *, tXML_WS_OP); + +static BOOLEAN xml_find_ch(tXML_MUL_STATE *, UINT8, tXML_WS_OP); + +static void xml_incr_pars_res(tXML_MUL_STATE *, tXML_RESULT); + +static void xml_set_bfr(tXML_MUL_STATE *, UINT8); + +/* parsing static functions */ + +static BOOLEAN xml_elems(tXML_MUL_STATE *, BOOLEAN); + +static BOOLEAN xml_qm_elem(tXML_MUL_STATE *); + +static BOOLEAN xml_ex_elem(tXML_MUL_STATE *, BOOLEAN); + +static BOOLEAN xml_tag_elem(tXML_MUL_STATE *); + +static BOOLEAN xml_etag_elem(tXML_MUL_STATE *); + +#define XML_SET_CLEAR 0 +#define XML_SET_NAME 1 +#define XML_SET_VALUE 2 + + + + +/***************************************************************************** +** API Functions +*****************************************************************************/ + +void XML_InitPars(tXML_MUL_STATE *p_st, tXML_CBACK xml_cback, void *p_usr_data) +{ + memset(p_st, 0, sizeof(tXML_MUL_STATE)); + p_st->cback = xml_cback; + p_st->p_usr_data = p_usr_data; + + /* by memset() + p_st->p_data_bfr = NULL; + p_st->next_token = 0; + p_st->curr_res = 0; + p_st->pars_res = XML_SUCCESS; + p_st->skip_next_nl = FALSE; + + p_st->prefix.p = NULL; + p_st->name.p = NULL; + p_st->value.p = NULL; + p_st->prefix.len= 0; + p_st->name.len = 0; + p_st->value.len = 0; + + p_st->status = XML_STS_INIT; + */ +} + + + +/***************************************************************************** +** +** Function XML_MulParse +** +** Description +** The current implementation of the xml_pars_api supports only those +** XML-contructs needed in BPP SOAP-messages. The parser must have a +** small footprint and is therefore small and simple. +** +** According to SOAP a message must not contain the doctypedecl construct +** (production) and it must not contain Processing Instructions (PI +** production), i.e. these constructs are not supported. In addition, +** CDATA sections, any external or internal entities and the XML +** Declaration are not supported (not used in BPP). Should any of these +** be included in a message being parsed, they will be reported returning +** a warning code. The parser will then try to find the next tag. +** When the parser reports an XML-event using the callback it will always +** continue, even if the callback returns false. All strings in event +** data passed with the callback are limited to 64 bytes in size, except +** the prefix string which has 32 as max size. Consequtive XML_CHARDATA +** events are not supported. Leading and trailing white space is removed +** from the value string before sending the XML_CHARDATA event. +** +** This function and also all other helping static parsing functions use +** more than one return statement in a function. The reason is that +** a parse error has been found and to exit as soon as possible. +** If one had used only one return in each function, the path +** representing a correct xml syntax had been expressed with very deeply +** nested if-statements. +** +** Parameters +** see h-file +** Returns +** see h-file +*****************************************************************************/ + +tXML_RESULT XML_MulParse(tXML_MUL_STATE *p_st, tXML_OS *p_os) +{ + BOOLEAN found; + BOOLEAN query, partial = FALSE; + BOOLEAN parse_ok = TRUE; + int keep_size; + tXML_RESULT res = XML_SUCCESS; + tXML_RESULT old_pars_res; + + p_st->curr_res = 1; /* not EOS */ + memcpy(&p_st->xml_os, p_os, sizeof(tXML_OS)); + old_pars_res = p_st->pars_res; + p_st->pars_res = XML_SUCCESS; + p_st->prefix.len = 0; + p_st->name.len = 0; + p_st->value.len = 0; + p_st->p_last_stm = 0; + p_st->p_copy = 0; + +#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE)) + XML_TRACE_DEBUG4("XML_MulParse status:%d, pars_res: %d, begin:%x, end:x%x", + p_st->status, old_pars_res, p_os->p_begin, p_os->p_end); +#endif + + /* this do-while(0) loop is to avoid too many return statements in this routine. + * it's easier to "cleanup" with only one return statement */ + if(p_st->status == XML_STS_INIT) + { + + p_st->p_cur = p_os->p_begin; +#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE)) + XML_TRACE_DEBUG1("p_cur:x%x", p_st->p_cur); +#endif + do + { + if (!xml_get_next(p_st, XML_PASS_WS)) /* obj store empty or err */ + { + res = XML_OBJ_ST_EMPTY; + break; + } + + found = FALSE; + while (!XML_EOS(p_st) && !found) + { /* skip all but top element */ + if (!xml_find_ch(p_st, XML_ST, XML_PASS_WS) || + !xml_get_next(p_st, XML_PASS_WS)) + { + res = XML_ERR; + break; + } + + if (p_st->next_token == XML_QM) + { + parse_ok = xml_qm_elem(p_st); + } + else if (p_st->next_token == XML_EX) + { + parse_ok = xml_ex_elem(p_st, TRUE); + } + else if (p_st->next_token == XML_EM) + { + parse_ok = FALSE; + if (!xml_get_next(p_st, XML_PASS_WS)) + { + res = XML_ERR; + break; + } + } + else + { + found = TRUE; + parse_ok = TRUE; + } + + if (!parse_ok) + xml_incr_pars_res(p_st, XML_ERR); + } + } while (0); + p_st->status = XML_STS_1STM; + } + else if(old_pars_res == XML_NO_PROP) + { + } + else + { +#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE)) + XML_TRACE_DEBUG2("p_st->last_bfr.p:x%x, p_st->used_last_bfr:%d", + p_st->last_bfr.p, p_st->used_last_bfr); +#endif + +/* if there was some data left, read it here. */ + if(p_st->partial_st.last_bfr.p && p_st->partial_st.used_last_bfr ) + { + memcpy(p_st->last_bfr.p, p_st->partial_st.last_bfr.p, p_st->partial_st.used_last_bfr); + p_st->used_last_bfr = p_st->partial_st.used_last_bfr; + p_st->last_bfr.p[p_st->partial_st.used_last_bfr] = 0; + p_st->event_data.part.parse = p_st->partial_st.event_data.part.parse; + + /* set length to 0 */ + p_st->partial_st.used_last_bfr = 0; + XML_TRACE_DEBUG1("retrieved PARTIAL data = [%s]\n", p_st->last_bfr.p); + + p_st->p_cur = p_st->last_bfr.p; + /* continuation packet */ + /* read a ch, setup xml_set_bfr */ + xml_get_next(p_st, XML_PASS_WS); + p_st->event_data.copy.p_begin = p_st->xml_os.p_begin; + p_st->event_data.copy.last.p = p_st->last_bfr.p; + p_st->event_data.copy.last.len = p_st->used_last_bfr; + p_st->cback(XML_COPY, &(p_st->event_data), p_st->p_usr_data); + } + else + { + if(p_st->used_last_bfr == 0) + { + p_st->p_cur = p_os->p_begin; + xml_get_next(p_st, XML_PASS_WS); + } + else + return XML_NO_MEM; + } +#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE)) + XML_TRACE_DEBUG1("p_st->p_cur:x%x", p_st->p_cur); +#endif + } + + XML_TRACE_DEBUG0("XML_MulParse end while"); + + if(res == XML_SUCCESS) + { + /* here we found "<(a-z)" */ + if (!XML_EOS(p_st)) + { + if(p_st->status == XML_STS_1STM) + { + /* remeber the beginning position right after '<' in the first line */ + /* if the first line can't be parsed at first round, save it to the second parse */ + p_st->p_copy = p_st->p_cur - 1; + parse_ok = xml_tag_elem(p_st); + } + + /* parsed the first line */ + XML_TRACE_DEBUG0("XML_MulParse exit xml_tag_elem"); + + if (!parse_ok) + { + query = p_st->cback(XML_QUERY, &(p_st->event_data), p_st->p_usr_data); + + /* if first line parsing is not completed while reach the end of stack, ERROR occurs */ + if (query == TRUE) + xml_incr_pars_res(p_st, XML_ERR); + else /* first line parsing to be continued, copy partial data at later point*/ + partial = TRUE; + } + else /* first line is parsed ok, change parsing status */ + p_st->status = XML_STS_1TAG; + + + + if (!XML_EOS(p_st) && parse_ok) + { + parse_ok = xml_elems(p_st, parse_ok); + query = p_st->cback(XML_QUERY, &(p_st->event_data), p_st->p_usr_data); + if (parse_ok == FALSE || query == FALSE) + { + partial = TRUE; + + } + else + p_st->status = XML_STS_DONE; + } + + /* copy partial data if any */ + if (partial) + { + if(p_st->pars_res == XML_NO_PROP) + { + p_st->p_cur = p_st->p_copy; + p_st->event_data.part.parse = p_st->pars_res; + p_st->event_data.part.p_keep = p_st->p_cur; + XML_TRACE_DEBUG1("p_st->p_cur:x%x (last_stm)", p_st->p_cur); + p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data); + xml_incr_pars_res(p_st, XML_NO_END); + } + else + { + if( p_st->last_bfr.p && + (p_st->p_copy > p_st->xml_os.p_begin) && + (p_st->p_copy < p_st->xml_os.p_end) ) + { + keep_size = p_st->xml_os.p_end - p_st->p_copy; + if(keep_size < p_st->last_bfr.len) + { + /* store the partial data to a temporary buffer, + NOT to the queue of buffers as it would overwrite current ones! */ + if(p_st->partial_st.last_bfr.p ) + { + XML_TRACE_DEBUG0("Store partial data\n"); + BCM_STRNCPY_S((char *)p_st->partial_st.last_bfr.p, 512, (char *)p_st->p_copy, keep_size); + p_st->partial_st.used_last_bfr= keep_size; + p_st->partial_st.last_bfr.p[keep_size] = 0; + p_st->partial_st.event_data.part.parse = p_st->pars_res; + p_st->partial_st.event_data.part.p_keep= p_st->last_bfr.p; + } + else + XML_TRACE_DEBUG0("ERROR to store partial data"); + + p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data); + xml_incr_pars_res(p_st, XML_NO_END); + } + } + }/* else NO_PROP */ + } /* end of partial */ + } /* end of !XML_EOS(p_st) */ + } /* end of res == XML_SUCCESS */ + + + return p_st->pars_res; +} + + +/***************************************************************************** +** Static Functions +*****************************************************************************/ + + + +/***************************************************************************** +** +** Function xml_set_bfr +** +** Description +** Sets the buffer that is going to be used when tokens are pushed from +** p_st->next_token into some buffer in the buffer set. +** +** Parameters +** p_st (in/out) : the parser state +** p_bfr (in) : the buffer that will get all tokens (characters) +** NULL is allowed in which case no buffer is used. +** bfr_max_ind (in) : the max index into the buffer in which a non-null +** char may be stored +** +** Returns +** - +*****************************************************************************/ +static void xml_set_bfr(tXML_MUL_STATE *p_st, UINT8 set) +{ + switch(set) + { + case XML_SET_NAME: + p_st->name.p = p_st->p_cur - 1; + p_st->p_data_bfr = p_st->name.p; + p_st->name.len = 0; + break; + case XML_SET_VALUE: + p_st->value.p = p_st->p_cur - 1; + p_st->p_data_bfr = p_st->value.p; + p_st->value.len = 0; + break; + default: + p_st->p_data_bfr = NULL; + } +} + + +/***************************************************************************** +** +** Function xml_write_bfr +** +** Description +** Pushes (copies) the character from p_st->next_token to the buffer, if +** any, that has been set calling xml_set_bfr. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** - +*****************************************************************************/ + +static void xml_write_bfr(tXML_MUL_STATE *p_st) +{ + if (p_st->p_data_bfr) + { + if(p_st->p_data_bfr == p_st->name.p) + p_st->name.len++; + else + p_st->value.len++; + } +} + + +/***************************************************************************** +** +** Function xml_incr_pars_res +** +** Description +** Sets the final parsing result if the new_res provided has +** higher rank than the current parsing result. +** +** Parameters +** p_st (in/out) : the parser state +** new_res (in) : the new parsing result +** +** Returns +** - +*****************************************************************************/ + +static void xml_incr_pars_res(tXML_MUL_STATE *p_st, tXML_RESULT new_res) +{ + if (new_res > p_st->pars_res) + { + switch(p_st->pars_res) + { + /* preserve these error messages */ + case XML_OBJ_ST_EMPTY: + case XML_NO_MEM: /* no last_bfr.p, and the tXML_MUL_STATE is not in init */ + case XML_NO_PROP: /* run out of tXML_PROP */ + break; + + default: + /* + case XML_SUCCESS: + case XML_WARNING: + case XML_ERR: + */ + p_st->pars_res = new_res; + break; + } + } +} + + +/***************************************************************************** +** +** Function xml_read_char +** +** Description +*****************************************************************************/ +static void xml_read_char(tXML_MUL_STATE *p_st) +{ + BOOLEAN get_new = FALSE; + + if (p_st->p_cur && p_st->p_cur >= p_st->last_bfr.p && p_st->p_cur < (p_st->last_bfr.p + p_st->used_last_bfr)) + { + /* left over from previous parse */ + p_st->next_token = *p_st->p_cur; + if(p_st->next_token == 0) + { + /* leftover is done, use the new one */ + p_st->p_cur = p_st->xml_os.p_begin; + p_st->last_bfr.p[0] = 0; + p_st->used_last_bfr = 0; + get_new = TRUE; + } + else + { + p_st->p_cur++; + p_st->curr_res = 1; + } + } + else + { + if(p_st->p_cur == (p_st->last_bfr.p + p_st->used_last_bfr)) + { + p_st->used_last_bfr = 0; + p_st->p_cur = p_st->xml_os.p_begin; + } + get_new = TRUE; + } + + if(get_new) + { + if(p_st->p_cur && p_st->p_cur < p_st->xml_os.p_end) + { + /* use buffer given to XML_Parse */ + p_st->next_token = *p_st->p_cur; + p_st->p_cur++; + p_st->curr_res = 1; + } + else + p_st->curr_res = 0; + } + + +/* + XML_TRACE_DEBUG4("xml_read_char p_cur: x%x, curr_res:%d, get_new:%d, token:%c", + p_st->p_cur, p_st->curr_res, get_new, p_st->next_token); +*/ +} + +/***************************************************************************** +** +** Function xml_get_next +** +** Description +** Writes the character in p_st->next_token to the current buffer if set. +** Then the next character is read from the Object Store into +** p_st->next_token. The first time get_next is called, the current +** buffer must be NULL, i.e p_st->data_bfr must be NULL. +** +** xml_get_next handles end-of-line as specified in the xml spec. It +** passes, skips or normalises (p.29 in XML spec) white spaces (ws) +** as specified in the ws_op param. Note, the ws_op applies when +** getting one (or many characters) from Object Store into the +** p_st->next_token. It does not apply when pushing the (initial) +** p_st->next_token to the current buffer. +** +** The characters are read one by one from the Object Store. +** Presently this is not anticipated to cause any problems +** regarding reading speed. Should it become a problem in the +** future, a new buffer could be introduced into which a chunk +** of characters could be put, using one Object Store read call. +** The get_next function would then get the next character from +** the new buffer. +** +** Parameters +** p_st (in/out) : the parser state +** ws_op (in) : the requested white space handling. +** +** Returns +** True if a character was successfully read into p_st->next_token. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_get_next(tXML_MUL_STATE *p_st, tXML_WS_OP ws_op) +{ + xml_write_bfr(p_st); + do + { + xml_read_char(p_st); + } while ((ws_op == XML_SKIP_WS) && XML_IS_WS(p_st->next_token) && + !XML_EOS(p_st)); + + + /* handle end-of-line if any after the do-while above */ + + if (!XML_EOS(p_st) && (p_st->next_token == 0xA) && p_st->skip_next_nl) + { /* we have previously found 0xD (cr) and have set the state var + ** p_st->skip_next_nl,see below + */ + xml_read_char(p_st); + } + p_st->skip_next_nl = FALSE; + + if (XML_EOS(p_st)) + { + p_st->next_token = 0; + return FALSE; + } + + if (p_st->next_token == 0xD) + { + p_st->next_token = 0xA; + p_st->skip_next_nl = TRUE; + } + + if ((ws_op == XML_NORM_WS) && + ((p_st->next_token == 0xA) || (p_st->next_token == 0x9))) + { + p_st->next_token = 0x20; + } + + return TRUE; +} + + +/***************************************************************************** +** +** Function xml_find_ch +** +** Description +** Searches for the character given in ch. It starts searching in +** p_st->next_token and if not found it gets characters from the Object +** Store until ch is in p_st->next_token. +** +** Parameters +** p_st (in/out) : the parser state +** ch (in) : the character to search for +** ws_op (in) : the requested white space handling when getting chars +** +** Returns +** True if the character was found. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_find_ch(tXML_MUL_STATE *p_st, UINT8 ch, tXML_WS_OP ws_op) +{ + while (!XML_EOS(p_st) && (p_st->next_token != ch)) + xml_get_next(p_st, ws_op); + return (BOOLEAN) !XML_EOS(p_st); +} + + +/***************************************************************************** +** +** Function xml_find_ch_n +** +** Description +** Same function as xml_find_ch, except that any character in p_str +** that is found will stop the search. +** +** Parameters +** p_st (in/out) : the parser state +** p_str (in) : the string containing the characters searched for. +** Must not be NULL or an empty string. +** +** Returns +** True if any of the characters in p_str was found. +** Fase otherwise. +*****************************************************************************/ + +static BOOLEAN xml_find_ch_n(tXML_MUL_STATE *p_st, const UINT8 *p_str) +{ + const UINT8 *p_tmp; + + while (!XML_EOS(p_st)) + { + for (p_tmp = p_str; *p_tmp; p_tmp++) + { + if (p_st->next_token == *p_tmp) + return TRUE; + } + xml_get_next(p_st, XML_PASS_WS); + } + return FALSE; +} + + +/***************************************************************************** +** +** Function xml_find_str +** +** Description +** Searches for p_str (i.e the exact sequence of characters in p_str) in +** the input from Object Store. The function ends with the character +** succeeding p_str in the input, (i.e that char is in p_st->next_token +** upon return) or with XML_EOS. +** +** Parameters +** p_st (in/out) : the parser state +** p_str (in) : the string to search for and pass by. +** Must not be NULL or an empty string. +** +** Returns +** True if the string was found. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_find_str(tXML_MUL_STATE *p_st, const UINT8 *p_str) +{ + const UINT8 *p_tmp; + + p_tmp = p_str; + while (*p_tmp && !XML_EOS(p_st)) + { + for (p_tmp = p_str; *p_tmp && !XML_EOS(p_st); p_tmp++) + { + if (p_st->next_token != *p_tmp) + break; + xml_get_next(p_st, XML_PASS_WS); + } + + if ((p_tmp == p_str) && !XML_EOS(p_st)) + { + xml_get_next(p_st, XML_PASS_WS); + } + } + + return (BOOLEAN) (*p_tmp == 0); +} + + +/***************************************************************************** +** +** Function xml_consume_str +** +** Description +** Checks for p_str i.e that the first character from p_str is in +** p_st->next_token and that the successors immediately follows in the +** Object Store. The p_str must not be last in the Object Store. +** +** Parameters +** p_st (in/out) : the parser state +** p_str (in) : the string to check if present next and to pass by +** Must not be NULL. +** +** Returns +** True if the string was found and was not last in the Object Store. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_consume_str(tXML_MUL_STATE *p_st, const UINT8 *p_str) +{ + do + { + if (p_st->next_token != *p_str) + return FALSE; + p_str++; + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + } while (*p_str); + return TRUE; +} + + +/***************************************************************************** +** +** Function xml_resolve_refs +** +** Description +** Resolves predefined entity references (sect. 4.6 in the XML spec) +** and character references (sect 4.1) that may be found in +** AttValue and content. (According to the XML spec it may also +** be in an EntityValue. However EntityValues are in the +** doctypedecl part which is not supported). +** +** The AttValue and content not beginning with a tag, must be +** stored in the p_st->p_bfr_set->value buffer. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** - +*****************************************************************************/ + +static void xml_resolve_refs(tXML_MUL_STATE *p_st) +{ + UINT8 *p_srch; /* where next search for & starts */ + UINT8 *p_am; /* points to found & */ + UINT8 *p_sc; /* points to found ; and succeeding chars */ + UINT8 *p_start; + UINT8 *p_tmp; + UINT32 ch_code; + UINT32 tmp_code; + INT8 i; + BOOLEAN resolved; + UINT16 len_left; + + p_srch = p_st->value.p; + len_left = p_st->value.len; + do + { + p_start = p_srch; + p_am = (UINT8*) strchr((char*) p_srch, XML_AM); + p_sc = p_am ? (UINT8*) strchr((char*) p_am, XML_SC) : NULL; + /* make sure the ptr does not exceed the end of the value str */ + if(p_sc > (len_left + p_start)) + p_sc = NULL; + + if (p_am && p_sc) + { + resolved = FALSE; + p_tmp = p_am + 1; + *p_sc = 0; /* terminate the ref by replacing ; with 0 */ + if (*p_tmp == XML_PD) /* character ref */ + { + if (p_tmp[1] == XML_HX) + *p_tmp = '0'; + else + { + for(p_tmp++; *p_tmp == '0'; p_tmp++) + { + ; + } + } + + ch_code = strtoul((char*) p_tmp, NULL, 0); + /* skip leading zero bytes */ + for (i = 3; (i >= 0) && !(ch_code >> i * 8); i--) + { + ; + } + p_tmp = p_am; + while (i >= 0) + { + /* mask out one byte and shift it rightmost */ + /* preceding bytes must be zero so shift left first */ + tmp_code = ch_code << ((3-i) * 8); + *p_tmp = (UINT8) (tmp_code >> 24); + p_tmp++; + i--; + } + resolved = TRUE; + } + else if (p_tmp < p_sc) /* check if predefined ref */ + { + resolved = TRUE; + if (strcmp((char*) p_tmp, XML_LT_STR) == 0) + { + *p_am = XML_ST; + p_st->value.len = p_st->value.len - 3; /* remove the length for lt; */ + p_st->p_cur = p_st->p_cur - 3; + } + else if (strcmp((char*) p_tmp, XML_GT_STR) == 0) + { + *p_am = XML_GT; + p_st->value.len = p_st->value.len - 3; /* remove the length for gt; */ + p_st->p_cur = p_st->p_cur - 3; + } + else if (strcmp((char*) p_tmp, XML_AMP_STR) == 0) + { + *p_am = XML_AM; + p_st->value.len = p_st->value.len - 4; /* remove the length for amp; */ + p_st->p_cur = p_st->p_cur - 4; + } + else if (strcmp((char*) p_tmp, XML_APOS_STR) == 0) + { + *p_am = XML_SQ; + p_st->value.len = p_st->value.len - 5; /* remove the length for apos; */ + p_st->p_cur = p_st->p_cur - 5; + } + else if (strcmp((char*) p_tmp, XML_QUOT_STR) == 0) + { + *p_am = XML_DQ; + p_st->value.len = p_st->value.len - 5; /* remove the length for quot; */ + p_st->p_cur = p_st->p_cur - 5; + } + else + resolved = FALSE; + } + + if (resolved) + { + p_srch = p_tmp; /* will contain char after ; */ + p_sc++; + while(*p_sc) + { + *p_tmp++ = *p_sc++; + } + } + else + { + *p_sc = XML_SC; /* restore the ref end */ + p_srch = p_sc + 1; + } + + } /* end if */ + } while (*p_srch && p_am && p_sc); +} + + +/***************************************************************************** +** +** Function xml_remove_trail_ws +** +** Description +** Removes trailing white space from the p_st->p_data_bfr buffer. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** - +*****************************************************************************/ + +static void xml_remove_trail_ws(tXML_MUL_STATE *p_st) +{ + UINT16 xx; + + if(p_st->value.p) + { + xx = p_st->value.len; + while(xx && XML_IS_WS(p_st->value.p[xx-1])) + xx--; + p_st->value.len = xx; + } + +} + + +/***************************************************************************** +** Parsing Static Functions +*****************************************************************************/ + + +/***************************************************************************** +** +** Function xml_name +** +** Description +** Parses a name and its prefix if any. The prefix and name buffers +** are set. +** The functions ends with either white space, +** XML_EQ, XML_EM or XML_GT in p_st->next_token or with XML_EOS. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** True if no error was found. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_name(tXML_MUL_STATE *p_st) +{ + BOOLEAN found = FALSE; + + p_st->prefix.p = NULL; + p_st->prefix.len = 0; + xml_set_bfr(p_st, XML_SET_NAME); + xml_find_ch_n(p_st, xml_name_srch); + if (!XML_EOS(p_st) && (p_st->next_token == XML_CO)) + { + if (p_st->name.len) + { + found = TRUE; + /* p_st->name.len is string size in name buffer, \0 excl. + */ + p_st->prefix.p = p_st->name.p; + p_st->prefix.len = p_st->name.len; + } + xml_get_next(p_st, XML_PASS_WS); + xml_set_bfr(p_st, XML_SET_NAME); + if (!XML_EOS(p_st)) + { + xml_find_ch_n(p_st, xml_name_srch + 1); + } + } + + found = (BOOLEAN) (found || p_st->name.len); + if(found) + xml_set_bfr(p_st, XML_SET_CLEAR); + return found; +} + + +/***************************************************************************** +** +** Function xml_attributes +** +** Description +** Parses an attribute list. +** The functions ends with the XML_GT or XML_EM char or XML_EOS. +** Error is reported if the attribute list is last in the Object +** Store. +** Sends a XML_ATTRIBUTE event in the user callback for each +** attribute found. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** True if no error was found. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_attributes(tXML_MUL_STATE *p_st) +{ + BOOLEAN cb_ret = TRUE; + UINT8 q_ch; + + XML_TRACE_DEBUG1("[xml_parse] xml_attributes: res= %d", p_st->pars_res); + + while ( cb_ret) + { + /* if this is a white space, then the next character is read from the + Object Store into p_st->next_token */ + if( XML_IS_WS(p_st->next_token) ) + { + if (!xml_get_next(p_st, XML_SKIP_WS)) + return FALSE; + } + + if (p_st->next_token == XML_EQ) + return FALSE; + + if ((p_st->next_token == XML_GT) || (p_st->next_token == XML_EM)) + return TRUE; + if (!xml_name(p_st) || XML_EOS(p_st)) + { + return FALSE; + } + if(XML_IS_WS(p_st->next_token)) + { + if (!xml_get_next(p_st, XML_SKIP_WS)) + return FALSE; + } + + if (p_st->next_token != XML_EQ) + return FALSE; + + if (!xml_get_next(p_st, XML_SKIP_WS)) + return FALSE; + + if ((p_st->next_token != XML_SQ) && (p_st->next_token != XML_DQ)) + return FALSE; + + q_ch = p_st->next_token; + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + + + xml_set_bfr(p_st, XML_SET_VALUE); + if (!xml_find_ch(p_st, q_ch, XML_NORM_WS)) + { + return FALSE; + } + + xml_set_bfr(p_st, XML_SET_CLEAR); + xml_resolve_refs(p_st); + + p_st->event_data.attr.prefix.p = p_st->prefix.p; + p_st->event_data.attr.prefix.len = p_st->prefix.len; + p_st->event_data.attr.name.p = p_st->name.p; + p_st->event_data.attr.name.len = p_st->name.len; + p_st->event_data.attr.value.p = p_st->value.p; + p_st->event_data.attr.value.len = p_st->value.len; + p_st->value.len = 0; + cb_ret = p_st->cback(XML_ATTRIBUTE, &(p_st->event_data), p_st->p_usr_data); + /* chk cback return */ + if(cb_ret == FALSE) + { + xml_incr_pars_res(p_st, XML_NO_PROP); + return FALSE; + } + + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + } + + return (BOOLEAN) + ((p_st->next_token == XML_GT) || (p_st->next_token == XML_EM)); +} + + +/***************************************************************************** +** +** Function xml_elems +** +** Description +** Parses all elements with all their content.This function is not a +** one-to-one mapped implementation of one production from the XML spec. +** Instead it uses a simplified iterative (as opposed to recursive) +** approach when parsing both the element and content productions. +** +** When a parsing error is found, this function tries to recover by +** searching for the next element (tag). +** +** When char data is found, the function sends the XML_CHARDATA event in +** the user callback. +** +** Other static functions with production names, start their parsing +** from the first character in their production. They might check +** that the first character (token) in the production matches +** p_st->next_token, alternatively they can just get rid of the first +** char in the production by calling get_next_ch. The exceptions to this +** are the xml_qm_elem, xml_ex_elem, xml_etag_elem and the xml_tag_elem +** functions which starts with the XML_QM, XML_EX, XML_EM and the first +** char in the tag name, respectively. +** +** Parameters +** p_st (in/out) : the parser state +** prev_ok (in) : if parsing done before calling this function was +** ok. If not, the functions starts with recovering. +** +** Returns +** True if parsing was successful possibly with successful recoveries. +** False if an error was found from which recovery failed (XML_EOS). +*****************************************************************************/ + +static BOOLEAN xml_elems(tXML_MUL_STATE *p_st, BOOLEAN prev_ok) +{ + BOOLEAN tag_found; + BOOLEAN cb_ret = TRUE; + + while (!XML_EOS(p_st) && prev_ok) + { + /* remove leading ws even if char data */ + if (XML_IS_WS(p_st->next_token)) + { + if (!xml_get_next(p_st, XML_SKIP_WS)) + return TRUE; + } + + tag_found = (BOOLEAN) (p_st->next_token == XML_ST); + if (!tag_found) + { + xml_set_bfr(p_st, XML_SET_VALUE); + tag_found = xml_find_ch(p_st, XML_ST, XML_PASS_WS); + + xml_remove_trail_ws(p_st); + if (p_st->value.len > 0) + { + xml_resolve_refs(p_st); + p_st->event_data.ch_data.value.p = p_st->value.p; + p_st->event_data.ch_data.value.len = p_st->value.len; + p_st->event_data.ch_data.last = TRUE; + p_st->value.len = 0; + cb_ret = p_st->cback(XML_CHARDATA, &(p_st->event_data), p_st->p_usr_data); + /* chk cback return */ + if(cb_ret == FALSE) + { + xml_incr_pars_res(p_st, XML_NO_PROP); + return FALSE; + } + + } + xml_set_bfr(p_st, XML_SET_CLEAR); + + if (!tag_found) + return prev_ok; + } + else + { + p_st->p_last_stm = p_st->p_cur - 1; + + if (p_st->p_cur) + p_st->p_copy = p_st->p_last_stm; + + p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data); + } + + /* tag was found */ + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + + + if (p_st->next_token == XML_QM) + prev_ok = xml_qm_elem(p_st); + else if (p_st->next_token == XML_EX) + { + prev_ok = xml_ex_elem(p_st, FALSE); + } + else if (p_st->next_token == XML_EM) + { + prev_ok = xml_etag_elem(p_st); + } + else + prev_ok = xml_tag_elem(p_st); + + + + if (!prev_ok) + xml_incr_pars_res(p_st, XML_ERR); + } + + XML_TRACE_DEBUG1("xml_elems prev_ok:%d", prev_ok); + return prev_ok; +} + + +/***************************************************************************** +** +** Function xml_qm_elem +** +** Description +** Recognises all productions starting with "<?". That is PI and XML decl. +** These productions are skipped and XML_WARNING is set. +** The function starts with the XML_QM as the first char (is in +** p_st->next_token).It ends with the XML_GT successor (is in +** p_st->next_token) or XML_EOS. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** True if no error was found trying to recognise the start and end of +** the productions. False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_qm_elem(tXML_MUL_STATE *p_st) +{ + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + if (!xml_find_str(p_st, (UINT8*) XML_QTAG_END_STR)) + return FALSE; + xml_incr_pars_res(p_st, XML_WARNING); + return TRUE; +} + + +/***************************************************************************** +** +** Function xml_ex_elem +** +** Description +** Handles all productions starting with "<!". They are Comments, CDSect +** doctypedecl and markupdecl. All are skipped. However, the inpar +** prolog must be set for the function to try to detect the doctypedecl +** and markupdecl beginning. +** +** The function starts with the XML_EX as the first char. +** The function ends with XML_EOS or the char succeeding XML_GT, +** except for doctypedecl and marcupdecl which ends with the next XM_TAG. +** +** Parameters +** p_st (in/out) : the parser state +** prolog (in) : should be set if in prolog in which case the function +** tries to detect (allows) the beginning of doctypedecl +** and markupdecl. +** Returns +** True if no error was found trying to recognise the start and end of +** the productions. False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_ex_elem(tXML_MUL_STATE *p_st, BOOLEAN prolog) +{ + UINT8 q_ch; + + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + + if (p_st->next_token == XML_HY) /* comment */ + { + if (!xml_consume_str(p_st, (UINT8*) XML_COMM_STR)) + return FALSE; + + if (!xml_find_str(p_st, (UINT8*) XML_COMM_END_STR)) + return FALSE; + } + else if (p_st->next_token == XML_LB) /* CDSect */ + { + if (!xml_consume_str(p_st, (UINT8*) XML_CDS_STR)) + return FALSE; + + if (!xml_find_str(p_st, (UINT8*) XML_CDS_END_STR)) + return FALSE; + + xml_incr_pars_res(p_st, XML_WARNING); + } + else if (prolog) /* doctypedecl or markupdecl */ + { + do + { + if (!xml_find_ch_n(p_st, (UINT8*) XML_DOCT_STR)) + return FALSE; + + if ((p_st->next_token == XML_SQ) || (p_st->next_token == XML_DQ)) + { + q_ch = p_st->next_token; + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + + if (!xml_find_ch(p_st, q_ch, XML_PASS_WS)) + return FALSE; + + xml_get_next(p_st, XML_PASS_WS); + } + } while (!XML_EOS(p_st) && (p_st->next_token != XML_ST)); + + xml_incr_pars_res(p_st, XML_WARNING); + } + else /* error */ + { + return FALSE; + } + + return TRUE; +} + + +/***************************************************************************** +** +** Function xml_tag_elem +** +** Description +** Parses a tag element. The function starts with the char succeeding the +** XML_ST char. +** The functions ends with the char succeeding the XML_GT char or +** with XML_EOS. +** Sends the XML_TAG and the XML_TAG_END events in a callback each. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** True if no error was found. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_tag_elem(tXML_MUL_STATE *p_st) +{ + BOOLEAN cb_ret = TRUE; + + if (!xml_name(p_st)) + return FALSE; + + p_st->event_data.tag.prefix.p = p_st->prefix.p; + p_st->event_data.tag.name.p = p_st->name.p; + p_st->event_data.tag.prefix.len = p_st->prefix.len; + p_st->event_data.tag.name.len = p_st->name.len; + p_st->event_data.tag.p_last_stm = p_st->p_last_stm; + cb_ret = p_st->cback(XML_TAG, &(p_st->event_data), p_st->p_usr_data); + if(cb_ret == FALSE) + { + xml_incr_pars_res(p_st, XML_NO_PROP); + return FALSE; + } + + /* chk cback return */ + + if (XML_EOS(p_st)) + return FALSE; + + if (XML_IS_WS(p_st->next_token)) + { + if (!xml_attributes(p_st)) + return FALSE; + } + + p_st->event_data.empty_elem.end = (BOOLEAN) (p_st->next_token == XML_EM); + if (p_st->event_data.empty_elem.end) + { + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + } + + if (p_st->next_token != XML_GT) + return FALSE; + + xml_get_next(p_st, XML_PASS_WS); + + cb_ret = p_st->cback(XML_TAG_END, &(p_st->event_data), p_st->p_usr_data); + + + if(cb_ret == FALSE) + { + xml_incr_pars_res(p_st, XML_NO_PROP); + return FALSE; + } + + p_st->p_copy = p_st->p_cur - 1; + p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data); + /* chk cback return */ + + return TRUE; +} + + +/***************************************************************************** +** +** Function xml_etag_elem +** +** Description +** Parses an end tag element. The function starts with the XML_EM char. +** The functions ends with the char succeeding the XML_GT char or +** with XML_EOS. Sends the XML_ETAG event in the user callback. +** +** Parameters +** p_st (in/out) : the parser state +** +** Returns +** True if no error was found. +** False otherwise. +*****************************************************************************/ + +static BOOLEAN xml_etag_elem(tXML_MUL_STATE *p_st) +{ + BOOLEAN cb_ret = TRUE; + + if (!xml_get_next(p_st, XML_PASS_WS)) + return FALSE; + + if (!xml_name(p_st)) + return FALSE; + + p_st->event_data.etag.prefix.p = p_st->prefix.p; + p_st->event_data.etag.name.p = p_st->name.p; + p_st->event_data.etag.name.len = p_st->name.len; + p_st->event_data.etag.prefix.len = p_st->prefix.len; + cb_ret = p_st->cback(XML_ETAG, &(p_st->event_data), p_st->p_usr_data); + if(cb_ret == FALSE) + { + xml_incr_pars_res(p_st, XML_NO_PROP); + return FALSE; + } + + p_st->p_copy = (p_st->prefix.p) ? p_st->prefix.p - 2: p_st->name.p - 2; + p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data); + + /* chk cback return */ + + if (XML_EOS(p_st)) + return FALSE; + + if (XML_IS_WS(p_st->next_token)) + if (!xml_get_next(p_st, XML_SKIP_WS)) + return FALSE; + + if (p_st->next_token != XML_GT) + return FALSE; + + xml_get_next(p_st, XML_PASS_WS); + + return TRUE; +} + |