diff options
Diffstat (limited to 'tools/data-binding/xmlGrammar/XMLLexer.g4')
-rw-r--r-- | tools/data-binding/xmlGrammar/XMLLexer.g4 | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/tools/data-binding/xmlGrammar/XMLLexer.g4 b/tools/data-binding/xmlGrammar/XMLLexer.g4 new file mode 100644 index 0000000..ea7a23c --- /dev/null +++ b/tools/data-binding/xmlGrammar/XMLLexer.g4 @@ -0,0 +1,93 @@ +/* + [The "BSD licence"] + Copyright (c) 2013 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** XML lexer derived from ANTLR v4 ref guide book example */ +lexer grammar XMLLexer; + +// Default "mode": Everything OUTSIDE of a tag +COMMENT : '<!--' .*? '-->' ; +CDATA : '<![CDATA[' .*? ']]>' ; +/** Scarf all DTD stuff, Entity Declarations like <!ENTITY ...>, + * and Notation Declarations <!NOTATION ...> + */ +DTD : '<!' .*? '>' -> skip ; +EntityRef : '&' Name ';' ; +CharRef : '&#' DIGIT+ ';' + | '&#x' HEXDIGIT+ ';' + ; +SEA_WS : (' '|'\t'|'\r'? '\n')+ ; + +OPEN : '<' -> pushMode(INSIDE) ; +XMLDeclOpen : '<?xml' S -> pushMode(INSIDE) ; +SPECIAL_OPEN: '<?' Name -> more, pushMode(PROC_INSTR) ; + +TEXT : ~[<&]+ ; // match any 16 bit char other than < and & + +// ----------------- Everything INSIDE of a tag --------------------- +mode INSIDE; + +CLOSE : '>' -> popMode ; +SPECIAL_CLOSE: '?>' -> popMode ; // close <?xml...?> +SLASH_CLOSE : '/>' -> popMode ; +SLASH : '/' ; +EQUALS : '=' ; +STRING : '"' ~[<"]* '"' + | '\'' ~[<']* '\'' + ; +Name : NameStartChar NameChar* ; +S : [ \t\r\n] -> skip ; + +fragment +HEXDIGIT : [a-fA-F0-9] ; + +fragment +DIGIT : [0-9] ; + +fragment +NameChar : NameStartChar + | '-' | '_' | '.' | DIGIT + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; + +fragment +NameStartChar + : [:a-zA-Z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; + +// ----------------- Handle <? ... ?> --------------------- +mode PROC_INSTR; + +PI : '?>' -> popMode ; // close <?...?> +IGNORE : . -> more ; |